def model_fn(features, labels, mode, params, config): is_training = True if mode == estimator.ModeKeys.TRAIN else False if mode != estimator.ModeKeys.PREDICT: features = self._parse_sequence_weight(features) features = self.sparse2dense(features, self._dataset.varlen_list) features = self._dense2sparse(features, self._dataset.varlen_list) network = self._Network(self._flags, self._dataset, 'input_layer') dense, embeddings = network.build_features(features) # 对齐MMOE的论文的模型 dense = tf.concat(dense + [tf.squeeze(emb, [1]) for emb in embeddings], -1) dense = tf.keras.layers.Dense(dense.get_shape().as_list()[-1], activation=tf.nn.relu)(dense) assert self._flags.network == "dnn", "If use MMOE model, expert’s network type should be dnn" experts_out = tf.stack( [self._Network(self._flags, self._dataset, 'expert_{}'.format(i))(dense, None, is_training) for i in range(self._flags.num_experts)], axis=1 ) gates = self._build_gates(dense) predictions = self._build_predictions(experts_out, gates) if mode == estimator.ModeKeys.PREDICT: return estimator.EstimatorSpec(mode, predictions=predictions) losses = self._build_losses(labels, predictions) metrics = self._build_muti_task_metrics(losses, labels, predictions) self._build_summary(losses, metrics) loss = reduce(tf.add, losses.values()) if mode == estimator.ModeKeys.EVAL: return estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) assert mode == estimator.ModeKeys.TRAIN train_op = self._build_train_op(loss) return estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params, config): is_training = True if mode == estimator.ModeKeys.TRAIN else False if mode != estimator.ModeKeys.PREDICT: features = self._parse_sequence_weight(features) features = self.sparse2dense(features, self._dataset.varlen_list) features = self._dense2sparse(features, self._dataset.varlen_list) network = self._Network(self._flags, self._dataset, 'input_layer') dense, embeddings = network.build_features(features) experts_out = tf.stack([ self._Network(self._flags, self._dataset, 'expert_{}'.format(i))(dense, embeddings, is_training) for i in range(self._flags.num_experts) ], axis=1) # 对每个任务都指定一个变量, 在计算loss的时候用上, 实现个性化的loss计算方式 # 论文代码的地址, https://blog.csdn.net/cdknight_happy/article/details/102618883 gates = self._build_gates((dense, embeddings)) predictions = self._build_predictions(experts_out, gates) if mode == estimator.ModeKeys.PREDICT: return estimator.EstimatorSpec(mode, predictions=predictions) losses = self._build_losses(labels, predictions) metrics = self._build_muti_task_metrics(losses, labels, predictions) self._build_summary(losses, metrics) log_vars = self._build_log_vars(self._dataset.task_types.keys()) losses = self._build_var_losses(losses, log_vars) loss = reduce(tf.add, losses.values()) if mode == estimator.ModeKeys.EVAL: return estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) assert mode == estimator.ModeKeys.TRAIN train_op = self._build_train_op(loss) return estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # imei_emb_w = tf.get_variable("user_emb_w", [params['imei_count'], params['hidden_units']]) attention_layers = [80, 40] mlp_layers = [100, 50, 20] pkg_w = tf.get_variable("i_item", [63002], initializer=tf.constant_initializer(0.0)) pkg_emb_w = tf.get_variable("i_id", [63002, params['embedding_size']], initializer=tf.glorot_normal_initializer()) pkgc_emb_w = tf.get_variable("i_cate", [802, params['embedding_size']], initializer=tf.glorot_normal_initializer()) # pkg_emb_w = tf.get_variable("pkg_emb_w", [2360, params['embedding_size']]) # pkgc_emb_w = tf.get_variable("pkgc_emb_w", [100, params['embedding_size']]) # ssid_emb_w = tf.get_variable("ssid_emb_w", [100, params['embedding_size']]) # soper_emb_w = tf.get_variable("soper_emb_w", [100, params['embedding_size']]) i_b = tf.gather(pkg_w, features['i_id']) with tf.variable_scope("embedding_layer"): # imei_emb = tf.nn.embedding_lookup(imei_emb_w, features['imei_index']) pkg_emb = tf.nn.embedding_lookup(pkg_emb_w, features['i_id']) pkgc_emb = tf.nn.embedding_lookup(pkgc_emb_w, features['i_cate']) def _attention(feat_emb_w, hist_ids, item_emb): dense_ids = hist_ids dense_emb = tf.nn.embedding_lookup(feat_emb_w, dense_ids) # None * P * K dense_mask = tf.expand_dims(tf.cast(dense_ids > 0, tf.float32), axis=-1) # dense_mask = tf.sequence_mask(dense_ids, ?) # None * P padded_dim = tf.shape(dense_ids)[1] # P hist_emb = tf.reshape(dense_emb, shape=[-1, params['embedding_size']]) query_emb = tf.reshape(tf.tile(item_emb, [1, padded_dim]), shape=[-1, params['embedding_size']]) # None * K --> (None * P) * K 注意跟dense_emb reshape顺序保持一致 att_net = tf.concat([ hist_emb, query_emb, hist_emb * query_emb, hist_emb - query_emb ], axis=1) # (None * P) * 3K for i in attention_layers: att_net = tf.layers.dense(att_net, units=i, activation=tf.nn.relu) # att_net = tf.layers.batch_normalization(att_net, training = (mode == estimator.ModeKeys.TRAIN)) att_net = tf.layers.dropout( att_net, rate=params['dropout'], training=(mode == estimator.ModeKeys.TRAIN)) att_wgt = tf.layers.dense(att_net, units=1, activation=None) att_wgt = tf.reshape(att_wgt, shape=[-1, padded_dim, 1]) # None * P * 1 wgt_emb = tf.multiply(dense_emb, att_wgt) # None * P * K # dense_mask wgt_emb = tf.reduce_sum(tf.multiply(wgt_emb, dense_mask), 1) # None * K return wgt_emb pkg_emb_h = _attention(pkg_emb_w, features['u_iid_seq'], pkg_emb) pkgc_emb_h = _attention(pkgc_emb_w, features['u_icat_seq'], pkgc_emb) with tf.variable_scope("mlp_layer"): net = tf.concat([pkg_emb, pkg_emb_h, pkgc_emb_h], axis=1) for units in mlp_layers: net = tf.layers.dense(net, units=units, activation=tf.nn.relu) # net = tf.layers.batch_normalization(net, training = (mode == estimator.ModeKeys.TRAIN)) net = tf.layers.dropout( net, rate=params['dropout'], training=(mode == estimator.ModeKeys.TRAIN)) logits = tf.layers.dense(net, units=1, activation=None) logits = tf.reshape(logits, (-1, )) + i_b pred = tf.sigmoid(logits) predictions = {"prob": pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: estimator.export.PredictOutput(predictions) } if mode == estimator.ModeKeys.PREDICT: return estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=tf.cast( labels, tf.float32))) # loss += 0.01 * tf.nn.l2_loss(pkg_emb_w) # loss += 0.01 * tf.nn.l2_loss(pkgc_emb_w) eval_metric_ops = { "AUC": tf.metrics.auc(labels, pred), 'Accuracy': tf.metrics.accuracy(labels, predictions=tf.round(pred)) } if mode == estimator.ModeKeys.EVAL: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) optimizer = tf.train.AdamOptimizer( learning_rate=params['learning_rate']) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) if mode == estimator.ModeKeys.TRAIN: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def model_fn_default(features: Dict[str, tf.Tensor], labels: tf.Tensor, mode: est.ModeKeys, params: Dict[str, Any]): """ :param features: :param labels: :param mode: :param params: :return: """ network_fn = params.get('network_fn') network_params = params.get('network_params') learning_rate = params.get('learning_rate', 1e-3) _ii = ['MSE', 'RMSE', 'MAE', 'MAPE'] inspection_indicators = [i.upper() for i in params.get('inspection_indicators', _ii)] if len(features.keys()) == 1: fea = features['input_0'] else: fea = [features[i] for i in features.keys()] network = network_fn(**network_params) network.summary() if mode == est.ModeKeys.PREDICT: predictions = network(fea, training=False) return est.EstimatorSpec( mode=mode, predictions=predictions, export_outputs={ 'result': est.export.PredictOutput(predictions) } ) if mode == est.ModeKeys.TRAIN: predictions = network(fea, training=True) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) loss = tf.losses.mean_squared_error(labels, predictions) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_or_create_global_step()) tf.identity(learning_rate, 'learning_rate') _get_indicators(labels, predictions, inspection_indicators) return est.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, ) if mode == est.ModeKeys.EVAL: predictions = network(fea, training=False) loss = tf.losses.mean_squared_error(labels=labels, predictions=predictions) eval_metric_ops = _get_indicators(labels, predictions, inspection_indicators, training=False) return est.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metric_ops )
def model_fn(features, labels, mode, params): linear_net = tf.feature_column.input_layer( features, params['linear_feature_columns']) embedding_net = tf.feature_column.input_layer( features, params['embedding_feature_columns']) with tf.name_scope('linear_net'): y_1d = tf.layers.dense(linear_net, 1, activation=tf.nn.relu) with tf.variable_scope('second-order'): fm_net = tf.reshape(embedding_net, [ -1, len(params['embedding_feature_columns']), params['embedding_size'] ]) fm_net_sum_square = tf.square(tf.reduce_sum(fm_net, axis=1)) fm_net_square_sum = tf.reduce_sum(tf.square(fm_net), axis=1) y_2d = 0.5 * tf.reduce_sum(tf.subtract(fm_net_sum_square, fm_net_square_sum), axis=1, keep_dims=True) logits = tf.concat([y_1d, y_2d], axis=-1) logits = tf.layers.dense(logits, units=1, activation=None) pred = tf.sigmoid(logits) predictions = {"prob": pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: estimator.export.PredictOutput(predictions) } if mode == estimator.ModeKeys.PREDICT: return estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=tf.cast( labels, tf.float32))) eval_metric_ops = { "AUC": tf.metrics.auc(labels, pred), 'Accuracy': tf.metrics.accuracy(labels, predictions=tf.round(pred)) } if mode == estimator.ModeKeys.EVAL: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate']) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) if mode == estimator.ModeKeys.TRAIN: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def fm(features, labels, mode, params): # ---------- hyper-parameters ---------- # feature_size = params["feature_size"] field_size = params["field_size"] embed_size = params["embed_size"] loss_mode = params["loss_mode"] optimizer = params["optimizer"] learning_rate = params["learning_rate"] l2_reg_lambda = params["l2_reg_lambda"] # ---------- initial weights ----------- # # [numeric_feature, one-hot categorical_feature]统一做embedding coe_b = tf.get_variable(name="coe_b", shape=[1], initializer=tf.constant_initializer(0.0)) coe_w = tf.get_variable(name="coe_w", shape=[feature_size], initializer=tf.glorot_normal_initializer()) coe_v = tf.get_variable(name="coe_v", shape=[feature_size, embed_size], initializer=tf.glorot_normal_initializer()) # ---------- reshape feature ----------- # feat_idx = features["feat_idx"] # 非零特征位置[batch_size, field_size, 1] feat_idx = tf.reshape(feat_idx, shape=[-1, field_size]) # [Batch, Field] feat_val = features["feat_val"] # 非零特征的值[batch_size, field_size, 1] feat_val = tf.reshape(feat_val, shape=[-1, field_size]) # [Batch, Field] # ------------- define f(x) ------------ # # FM: y = b + sum<wi,xi> + sum(<vi,vj>xi*xj) with tf.variable_scope("First-Order"): feat_wgt = tf.nn.embedding_lookup(coe_w, feat_idx) # [Batch, Field] y_w = tf.reduce_sum(tf.multiply(feat_wgt, feat_val), 1) # [Batch] with tf.variable_scope("Second-Order"): embeddings = tf.nn.embedding_lookup(coe_v, feat_idx) # [Batch, Field, K] feat_vals = tf.reshape(feat_val, shape=[-1, field_size, 1]) # [Batch, Field, 1] embeddings = tf.multiply(embeddings, feat_vals) # [Batch, Field, K] sum_square = tf.square(tf.reduce_sum(embeddings, 1)) # [Batch, K] square_sum = tf.reduce_sum(tf.square(embeddings), 1) # [Batch, K] y_v = 0.5 * tf.reduce_sum(tf.subtract(sum_square, square_sum), 1) # [Batch] with tf.variable_scope("FM-Out"): y_b = coe_b * tf.ones_like(y_w, dtype=tf.float32) # [Batch] y_hat = y_b + y_w + y_v # [Batch] y_pred = tf.nn.sigmoid(y_hat) # [Batch] # ----- mode: predict/evaluate/train ----- # # predict: 不计算loss/metric; evaluate: 不进行梯度下降和参数更新 # Provide an estimator spec for 'ModeKeys.PREDICT' predictions = {"prob": y_pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: estimator.export.PredictOutput(predictions) } if mode == estimator.ModeKeys.PREDICT: return estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) # Provide an estimator spec for 'ModeKeys.EVAL' if loss_mode == "log_loss": loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=y_hat)) +\ l2_reg_lambda * tf.nn.l2_loss(coe_w) + l2_reg_lambda * tf.nn.l2_loss(coe_v) else: loss = tf.reduce_mean(tf.square(labels - y_pred)) eval_metric_ops = {"auc": tf.metrics.auc(labels, y_pred)} if mode == estimator.ModeKeys.EVAL: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) # Provide an estimator spec for 'ModeKeys.TRAIN' if optimizer == "Adam": opt_mode = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif optimizer == "Adagrad": opt_mode = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8) elif optimizer == "Momentum": opt_mode = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95) elif optimizer == "Ftrl": opt_mode = tf.train.FtrlOptimizer(learning_rate) else: opt_mode = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) train_op = opt_mode.minimize(loss, global_step=tf.train.get_global_step()) if mode == estimator.ModeKeys.TRAIN: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def nfm(features, labels, mode, params): # ---------- hyper-parameters ---------- # feature_size = params["feature_size"] field_size = params["field_size"] embed_size = params["embed_size"] loss_mode = params["loss_mode"] optimizer = params["optimizer"] learning_rate = params["learning_rate"] l2_reg_lambda = params["l2_reg_lambda"] layers = list(map(int, params["deep_layers"].split(','))) dropout = list(map(float, params["dropout"].split(','))) # ---------- initial weights ----------- # # [numeric_feature, one-hot categorical_feature]统一做embedding coe_b = tf.get_variable(name="coe_b", shape=[1], initializer=tf.constant_initializer(0.0)) coe_w = tf.get_variable(name="coe_w", shape=[feature_size], initializer=tf.glorot_normal_initializer()) coe_v = tf.get_variable(name="coe_v", shape=[feature_size, embed_size], initializer=tf.glorot_normal_initializer()) # ---------- reshape feature ----------- # feat_idx = features["feat_idx"] # 非零特征位置[batch_size, field_size, 1] feat_idx = tf.reshape(feat_idx, shape=[-1, field_size]) # [Batch, Field] feat_val = features["feat_val"] # 非零特征的值[batch_size, field_size, 1] feat_val = tf.reshape(feat_val, shape=[-1, field_size]) # [Batch, Field] # ------------- define f(x) ------------ # with tf.variable_scope("First-Order"): feat_wgt = tf.nn.embedding_lookup(coe_w, feat_idx) # [Batch, Field] y_w = tf.reduce_sum(tf.multiply(feat_wgt, feat_val), 1) # [Batch] with tf.variable_scope("Bi-Interaction-Layer"): embeddings = tf.nn.embedding_lookup(coe_v, feat_idx) # [Batch, Field, K] feat_vals = tf.reshape(feat_val, shape=[-1, field_size, 1]) # [Batch, Field, 1] embeddings = tf.multiply(embeddings, feat_vals) # [Batch, Field, K] sum_square = tf.square(tf.reduce_sum(embeddings, 1)) # [Batch, K] square_sum = tf.reduce_sum(tf.square(embeddings), 1) # [Batch, K] bi_out = 0.5 * (tf.subtract(sum_square, square_sum)) # [Batch, K] with tf.variable_scope("Deep-Layer"): deep_inputs = bi_out # hidden layer for i in range(len(layers)): deep_inputs = tf.contrib.layers.fully_connected( inputs=deep_inputs, num_outputs=layers[i], scope="mlp_%d" % i, weights_regularizer=tf.contrib.layers.l2_regularizer( l2_reg_lambda)) if mode == estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i]) # output layer y_d = tf.contrib.layers.fully_connected( inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, weights_regularizer=tf.contrib.layers.l2_regularizer( l2_reg_lambda), scope='deep_out') with tf.variable_scope("NFM-Out"): y_deep = tf.reshape(y_d, shape=[-1]) y_bias = coe_b * tf.ones_like(y_w, dtype=tf.float32) # [Batch] y_hat = y_bias + y_w + y_deep # [Batch] y_pred = tf.nn.sigmoid(y_hat) # [Batch] # ----- mode: predict/evaluate/train ----- # # predict: 不计算loss/metric; evaluate: 不进行梯度下降和参数更新 # Provide an estimator spec for 'ModeKeys.PREDICT' predictions = {"prob": y_pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: estimator.export.PredictOutput(predictions) } if mode == estimator.ModeKeys.PREDICT: return estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) # Provide an estimator spec for 'ModeKeys.EVAL' if loss_mode == "log_loss": loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=y_hat)) +\ l2_reg_lambda * tf.nn.l2_loss(coe_w) + l2_reg_lambda * tf.nn.l2_loss(coe_v) else: loss = tf.reduce_mean(tf.square(labels - y_pred)) eval_metric_ops = {"auc": tf.metrics.auc(labels, y_pred)} if mode == estimator.ModeKeys.EVAL: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) # Provide an estimator spec for 'ModeKeys.TRAIN' if optimizer == "Adam": opt_mode = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif optimizer == "Adagrad": opt_mode = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8) elif optimizer == "Momentum": opt_mode = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95) elif optimizer == "Ftrl": opt_mode = tf.train.FtrlOptimizer(learning_rate) else: opt_mode = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) train_op = opt_mode.minimize(loss, global_step=tf.train.get_global_step()) if mode == estimator.ModeKeys.TRAIN: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def fpnn(features, labels, mode, params): # ---------- hyper-parameters ---------- # algorithm = params["algorithm"] feature_size = params["feature_size"] field_size = params["field_size"] embed_size = params["embed_size"] loss_mode = params["loss_mode"] optimizer = params["optimizer"] learning_rate = params["learning_rate"] l2_reg_lambda = params["l2_reg_lambda"] layers = list(map(int, params["deep_layers"].split(','))) # l1神经元数量等于D1长度 dropout = list(map(float, params["dropout"].split(','))) # ---------- initial weights ----------- # # [numeric_feature, one-hot categorical_feature]统一做embedding coe_b = tf.get_variable(name="coe_b", shape=[1], initializer=tf.constant_initializer(0.0)) coe_w = tf.get_variable(name="coe_w", shape=[feature_size], initializer=tf.glorot_normal_initializer()) coe_v = tf.get_variable(name="coe_v", shape=[feature_size, embed_size], initializer=tf.glorot_normal_initializer()) coe_line = tf.get_variable(name="coe_line", shape=[layers[0], field_size, embed_size], initializer=tf.glorot_normal_initializer()) coe_ipnn = tf.get_variable(name="coe_ipnn", shape=[layers[0], field_size], initializer=tf.glorot_normal_initializer()) coe_opnn = tf.get_variable(name="coe_opnn", shape=[layers[0], embed_size, embed_size], initializer=tf.glorot_normal_initializer()) # ---------- reshape feature ----------- # feat_idx = features["feat_idx"] # 非零特征位置[batch_size, field_size, 1] feat_idx = tf.reshape(feat_idx, shape=[-1, field_size]) # [Batch, Field] feat_val = features["feat_val"] # 非零特征的值[batch_size, field_size, 1] feat_val = tf.reshape(feat_val, shape=[-1, field_size]) # [Batch, Field] # ------------- define f(x) ------------ # with tf.variable_scope("Linear-Part"): feat_wgt = tf.nn.embedding_lookup(coe_w, feat_idx) # [Batch, Field] y_linear = tf.reduce_sum(tf.multiply(feat_wgt, feat_val), 1) # [Batch] with tf.variable_scope("Embed-Layer"): embeddings = tf.nn.embedding_lookup(coe_v, feat_idx) # [Batch, Field, K] feat_vals = tf.reshape(feat_val, shape=[-1, field_size, 1]) # [Batch, Field, 1] embeddings = tf.multiply(embeddings, feat_vals) # [Batch, Field, K] with tf.variable_scope("Product-Layer"): if algorithm == "FNN": feat_vec = tf.reshape(embeddings, shape=[-1, field_size * embed_size]) feat_bias = coe_b * tf.reshape( tf.ones_like(y_linear, dtype=tf.float32), shape=[-1, 1]) deep_inputs = tf.concat([feat_wgt, feat_vec, feat_bias], 1) # [Batch, (Field+1)*K+1] elif algorithm == "IPNN": # linear signal z = tf.reshape(embeddings, shape=[-1, field_size * embed_size]) # [Batch, Field*K] wz = tf.reshape(coe_line, shape=[-1, field_size * embed_size]) # [D1, Field*K] lz = tf.matmul(z, tf.transpose(wz)) # [Batch, D1] # quadratic signal row_i = [] col_j = [] for i in range(field_size - 1): for j in range(i + 1, field_size): row_i.append(i) col_j.append(j) fi = tf.gather(embeddings, row_i, axis=1) # 根据索引从参数轴上收集切片[Batch, num_pairs, K] fj = tf.gather(embeddings, col_j, axis=1) # 根据索引从参数轴上收集切片[Batch, num_pairs, K] # p_ij = g(fi,fj)=<fi,fj> 特征i和特征j的隐向量的内积 p = tf.reduce_sum(tf.multiply(fi, fj), 2) # p矩阵展成向量[Batch, num_pairs] wpi = tf.gather(coe_ipnn, row_i, axis=1) # 根据索引从参数轴上收集切片[D1, num_pairs] wpj = tf.gather(coe_ipnn, col_j, axis=1) # 根据索引从参数轴上收集切片[D1, num_pairs] wp = tf.multiply(wpi, wpj) # D1个W矩阵组成的矩阵(每行代表一个W)[D1, num_pairs] lp = tf.matmul(p, tf.transpose(wp)) # [Batch, D1] lb = coe_b * tf.reshape(tf.ones_like(y_linear, dtype=tf.float32), shape=[-1, 1]) deep_inputs = lz + lp + lb # [Batch, D1] elif algorithm == "OPNN": # linear signal z = tf.reshape(embeddings, shape=[-1, field_size * embed_size]) # [Batch, Field*K] wz = tf.reshape(coe_line, shape=[-1, field_size * embed_size]) # [D1, Field*K] lz = tf.matmul(z, tf.transpose(wz)) # [Batch, D1] # quadratic signal f_sigma = tf.reduce_sum(embeddings, axis=1) # [Batch, K] p = tf.matmul(tf.reshape(f_sigma, shape=[-1, embed_size, 1]), tf.reshape(f_sigma, shape=[-1, 1, embed_size])) # [Batch, K, K] p = tf.reshape(p, shape=[-1, embed_size * embed_size]) # [Batch, K*K] wp = tf.reshape(coe_opnn, shape=[-1, embed_size * embed_size]) # [D1, K*K] lp = tf.matmul(p, tf.transpose(wp)) # [Batch, D1] lb = coe_b * tf.reshape(tf.ones_like(y_linear, dtype=tf.float32), shape=[-1, 1]) deep_inputs = lz + lp + lb # [Batch, D1] with tf.variable_scope("Deep-Layer"): # hidden layer for i in range(len(layers)): deep_inputs = tf.contrib.layers.fully_connected( inputs=deep_inputs, num_outputs=layers[i], scope="mlp_%d" % i, weights_regularizer=tf.contrib.layers.l2_regularizer( l2_reg_lambda)) if mode == estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i]) # output layer y_d = tf.contrib.layers.fully_connected( inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, weights_regularizer=tf.contrib.layers.l2_regularizer( l2_reg_lambda), scope='deep_out') with tf.variable_scope("FPNN-Out"): y_hat = tf.reshape(y_d, shape=[-1]) y_pred = tf.nn.sigmoid(y_hat) # ----- mode: predict/evaluate/train ----- # # predict: 不计算loss/metric; evaluate: 不进行梯度下降和参数更新 # Provide an estimator spec for 'ModeKeys.PREDICT' predictions = {"prob": y_pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: estimator.export.PredictOutput(predictions) } if mode == estimator.ModeKeys.PREDICT: return estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) # Provide an estimator spec for 'ModeKeys.EVAL' if loss_mode == "log_loss": loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=y_hat)) +\ l2_reg_lambda * tf.nn.l2_loss(coe_w) + l2_reg_lambda * tf.nn.l2_loss(coe_v) else: loss = tf.reduce_mean(tf.square(labels - y_pred)) eval_metric_ops = {"auc": tf.metrics.auc(labels, y_pred)} if mode == estimator.ModeKeys.EVAL: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) # Provide an estimator spec for 'ModeKeys.TRAIN' if optimizer == "Adam": opt_mode = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif optimizer == "Adagrad": opt_mode = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8) elif optimizer == "Momentum": opt_mode = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95) elif optimizer == "Ftrl": opt_mode = tf.train.FtrlOptimizer(learning_rate) else: opt_mode = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) train_op = opt_mode.minimize(loss, global_step=tf.train.get_global_step()) if mode == estimator.ModeKeys.TRAIN: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): layers = list(map(int, params["deep_layers"].split(','))) # Not following the paper using dense feature here. # Due to numerical stability linear_net = tf.feature_column.input_layer( features, params['linear_feature_columns']) embedding_net = tf.feature_column.input_layer( features, params['embedding_feature_columns']) x0 = embedding_net cross_dim = x0.shape[1] # with tf.name_scope('linear_net'): # linear_y = tf.layers.dense(linear_net, 1, activation=tf.nn.relu) with tf.variable_scope('cross_layers'): xl = x0 for i in range(FLAGS.cross_layers): # wl = tf.reshape(cross_weight[i], shape=[-1, 1]) # (dim * 1) # xlw = tf.matmul(xl, wl) # (? * 1) # xl = x0 * xlw + xl + cross_bias[i] # (? * dim) with tf.variable_scope('cross_{}'.format(i)): w = tf.get_variable("weight", [cross_dim], initializer=tf.glorot_normal_initializer()) b = tf.get_variable("bias", [cross_dim], initializer=tf.glorot_normal_initializer()) xw = tf.tensordot(tf.reshape(xl, [-1, 1, cross_dim]), w, 1) xl = xw * x0 + xl + b with tf.variable_scope('deep_layers'): dnn_net = x0 for i in layers: dnn_net = tf.layers.dense(dnn_net, i, activation=tf.nn.relu) dnn_net = tf.layers.batch_normalization( dnn_net, training=(mode == estimator.ModeKeys.TRAIN)) dnn_net = tf.layers.dropout( dnn_net, rate=params['dropout'], training=(mode == estimator.ModeKeys.TRAIN)) logits = tf.concat([dnn_net, xl], axis=-1) logits = tf.layers.dense(logits, units=1, activation=None) pred = tf.sigmoid(logits) predictions = {"prob": pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: estimator.export.PredictOutput(predictions) } if mode == estimator.ModeKeys.PREDICT: return estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=tf.cast( labels, tf.float32))) eval_metric_ops = { "AUC": tf.metrics.auc(labels, pred), 'Accuracy': tf.metrics.accuracy(labels, predictions=tf.round(pred)) } if mode == estimator.ModeKeys.EVAL: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate']) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) if mode == estimator.ModeKeys.TRAIN: return estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): """ Build model for estimator. @param features: @param labels: @param mode: @param params: @return: """ layers = list(map(int, params["deep_layers"].split(','))) embedding_feature_columns = params['embedding_feature_columns'] embedding_features = feature_column.input_layer(features, embedding_feature_columns) linear_feature_columns = params['linear_feature_columns'] linear_features = feature_column.input_layer(features, linear_feature_columns) with tf.variable_scope('first-order'): y_1d = tf.layers.dense(linear_features, 1, activation=tf.nn.relu) with tf.variable_scope('second-order'): fm_net = tf.reshape(embedding_features, [-1, len(params['embedding_feature_columns']), params['embedding_size']]) fm_net_sum_square = tf.square(tf.reduce_sum(fm_net, axis=1)) fm_net_square_sum = tf.reduce_sum(tf.square(fm_net), axis=1) y_2d = 0.5 * tf.reduce_sum(tf.subtract(fm_net_sum_square, fm_net_square_sum), axis=1, keep_dims=True) with tf.variable_scope('dnn'): dnn_net = tf.reshape(embedding_features, shape=[-1, len(params['embedding_feature_columns']) * params['embedding_size']]) for i in layers: dnn_net = tf.layers.dense(dnn_net, i, activation=tf.nn.relu) dnn_net = tf.layers.batch_normalization(dnn_net, training=(mode == estimator.ModeKeys.TRAIN)) dnn_net = tf.layers.dropout(dnn_net, rate=params['dropout'], training=(mode == estimator.ModeKeys.TRAIN)) y_dnn = tf.layers.dense(dnn_net, 1, activation=tf.nn.relu) logits = tf.concat([y_1d, y_2d, y_dnn], axis=-1) logits = tf.layers.dense(logits, units=1) logits = tf.reshape(logits, (-1,)) pred = tf.sigmoid(logits) predictions = {"prob": pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: estimator.export.PredictOutput( predictions)} if mode == estimator.ModeKeys.PREDICT: return estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=tf.cast(labels, tf.float32)) ) eval_metric_ops = { "AUC": tf.metrics.auc(labels, pred), 'Accuracy': tf.metrics.accuracy(labels, predictions=tf.round(pred)) } if mode == estimator.ModeKeys.EVAL: return estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate']) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) if mode == estimator.ModeKeys.TRAIN: return estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op)