def net(self, inputs, is_infer=False): init_value_ = 0.1 is_distributed = True if envs.get_trainer() == "CtrTrainer" else False # ------------------------- network input -------------------------- raw_feat_idx = self._sparse_data_var[1] raw_feat_value = self._dense_data_var[0] self.label = self._sparse_data_var[0] feat_idx = raw_feat_idx feat_value = paddle.reshape( raw_feat_value, [-1, self.num_field]) # None * num_field * 1 LR_model = LRLayer(self.sparse_feature_number, init_value_, self.reg, self.num_field) self.predict = LR_model(feat_idx, feat_value) cost = paddle.nn.functional.log_loss(input=self.predict, label=paddle.cast( self.label, "float32")) avg_cost = paddle.sum(x=cost) self._cost = avg_cost predict_2d = paddle.concat(x=[1 - self.predict, self.predict], axis=1) label_int = paddle.cast(self.label, 'int64') auc_var, batch_auc_var, _ = paddle.fluid.layers.auc(input=predict_2d, label=label_int, slide_steps=0) self._metrics["AUC"] = auc_var self._metrics["BATCH_AUC"] = batch_auc_var if is_infer: self._infer_results["AUC"] = auc_var
def _init_hyper_parameters(self): self.is_distributed = True if envs.get_trainer( ) == "CtrTrainer" else False self.sparse_feature_number = envs.get_global_env( "hyper_parameters.sparse_feature_number") self.sparse_feature_dim = envs.get_global_env( "hyper_parameters.sparse_feature_dim") self.learning_rate = envs.get_global_env( "hyper_parameters.optimizer.learning_rate")
def net(self, inputs, is_infer=False): init_value_ = 0.1 is_distributed = True if envs.get_trainer() == "CtrTrainer" else False # ------------------------- network input -------------------------- raw_feat_idx = self._sparse_data_var[1] raw_feat_value = self._dense_data_var[0] self.label = self._sparse_data_var[0] feat_idx = raw_feat_idx feat_value = fluid.layers.reshape( raw_feat_value, [-1, self.num_field]) # None * num_field * 1 first_weights_re = fluid.embedding( input=feat_idx, is_sparse=True, is_distributed=is_distributed, dtype='float32', size=[self.sparse_feature_number + 1, 1], padding_idx=0, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_), regularizer=fluid.regularizer.L1DecayRegularizer(self.reg))) first_weights = fluid.layers.reshape(first_weights_re, shape=[-1, self.num_field ]) # None * num_field * 1 y_first_order = fluid.layers.reduce_sum(first_weights * feat_value, 1, keep_dim=True) b_linear = fluid.layers.create_parameter( shape=[1], dtype='float32', default_initializer=fluid.initializer.ConstantInitializer(value=0)) self.predict = fluid.layers.sigmoid(y_first_order + b_linear) cost = fluid.layers.log_loss(input=self.predict, label=fluid.layers.cast( self.label, "float32")) avg_cost = fluid.layers.reduce_sum(cost) self._cost = avg_cost predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1) label_int = fluid.layers.cast(self.label, 'int64') auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d, label=label_int, slide_steps=0) self._metrics["AUC"] = auc_var self._metrics["BATCH_AUC"] = batch_auc_var if is_infer: self._infer_results["AUC"] = auc_var
def _init_hyper_parameters(self): self.is_distributed = True if envs.get_trainer( ) == "CtrTrainer" else False self.sparse_feature_number = envs.get_global_env( "hyper_parameters.sparse_feature_number", None) self.sparse_feature_dim = envs.get_global_env( "hyper_parameters.sparse_feature_dim", None) self.is_sparse = envs.get_global_env("hyper_parameters.is_sparse", False) self.reg = envs.get_global_env("hyper_parameters.reg", 1e-4) self.num_field = envs.get_global_env("hyper_parameters.num_field", None)
def net(self): is_distributed = True if envs.get_trainer() == "CtrTrainer" else False sparse_feature_number = envs.get_global_env( "hyper_parameters.sparse_feature_number", None, self._namespace) sparse_feature_dim = envs.get_global_env( "hyper_parameters.sparse_feature_dim", None, self._namespace) def embedding_layer(input): emb = fluid.layers.embedding( input=input, is_sparse=True, is_distributed=is_distributed, size=[sparse_feature_number, sparse_feature_dim], param_attr=fluid.ParamAttr( name="SparseFeatFactors", initializer=fluid.initializer.Uniform()), ) emb_sum = fluid.layers.sequence_pool(input=emb, pool_type='sum') return emb_sum def fc(input, output_size): output = fluid.layers.fc( input=input, size=output_size, act='relu', param_attr=fluid.ParamAttr( initializer=fluid.initializer.Normal( scale=1.0 / math.sqrt(input.shape[1])))) return output sparse_embed_seq = list(map(embedding_layer, self.sparse_inputs)) concated = fluid.layers.concat(sparse_embed_seq + [self.dense_input], axis=1) fcs = [concated] hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace) for size in hidden_layers: fcs.append(fc(fcs[-1], size)) predict = fluid.layers.fc( input=fcs[-1], size=2, act="softmax", param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( scale=1 / math.sqrt(fcs[-1].shape[1])))) self.predict = predict
def _build_trainer(yaml_path): print(envs.pretty_print_envs(envs.get_global_envs())) train_mode = envs.get_trainer() trainer_abs = trainers.get(train_mode, None) if trainer_abs is None: if not os.path.isfile(train_mode): raise IOError("trainer {} can not be recognized".format( train_mode)) trainer_abs = train_mode train_mode = "UserDefineTrainer" trainer_class = envs.lazy_instance_by_fliename(trainer_abs, train_mode) trainer = trainer_class(yaml_path) return trainer
def _init_hyper_parameters(self): self.is_distributed = True if envs.get_trainer( ) == "CtrTrainer" else False self.sparse_feature_number = envs.get_global_env( "hyper_parameters.sparse_feature_number") self.sparse_feature_dim = envs.get_global_env( "hyper_parameters.sparse_feature_dim") self.neg_num = envs.get_global_env("hyper_parameters.neg_num") self.with_shuffle_batch = envs.get_global_env( "hyper_parameters.with_shuffle_batch") self.learning_rate = envs.get_global_env( "hyper_parameters.optimizer.learning_rate") self.decay_steps = envs.get_global_env( "hyper_parameters.optimizer.decay_steps") self.decay_rate = envs.get_global_env( "hyper_parameters.optimizer.decay_rate")
def _init_hyper_parameters(self): self.is_distributed = True if envs.get_trainer( ) == "CtrTrainer" else False self.sparse_feature_number = envs.get_global_env( "hyper_parameters.sparse_feature_number", None) self.sparse_feature_dim = envs.get_global_env( "hyper_parameters.sparse_feature_dim", None) self.is_sparse = envs.get_global_env("hyper_parameters.is_sparse", False) self.reg = envs.get_global_env("hyper_parameters.reg", 1e-4) self.num_field = envs.get_global_env("hyper_parameters.num_field", None) self.hidden1_attention_size = envs.get_global_env( "hyper_parameters.hidden1_attention_size", 16) self.attention_act = envs.get_global_env("hyper_parameters.act", "relu")
def _init_hyper_parameters(self): self.is_distributed = True if envs.get_trainer( ) == "CtrTrainer" else False self.sparse_feature_number = envs.get_global_env( "hyper_parameters.sparse_feature_number", None) self.sparse_feature_dim = envs.get_global_env( "hyper_parameters.sparse_feature_dim", None) self.is_sparse = envs.get_global_env("hyper_parameters.is_sparse", False) self.use_batchnorm = envs.get_global_env( "hyper_parameters.use_batchnorm", False) self.use_dropout = envs.get_global_env("hyper_parameters.use_dropout", False) self.dropout_prob = envs.get_global_env( "hyper_parameters.dropout_prob", None) self.layer_sizes = envs.get_global_env("hyper_parameters.fc_sizes", None) self.loss_type = envs.get_global_env("hyper_parameters.loss_type", 'logloss') self.reg = envs.get_global_env("hyper_parameters.reg", 1e-4) self.num_field = envs.get_global_env("hyper_parameters.num_field", None) self.act = envs.get_global_env("hyper_parameters.act", None)
def tdm_net(self, input): """ tdm训练网络的主要流程部分 """ is_distributed = True if envs.get_trainer() == "CtrTrainer" else False input_emb = input[0] item_label = input[1] # 根据输入的item的正样本在给定的树上进行负采样 # sample_nodes 是采样的node_id的结果,包含正负样本 # sample_label 是采样的node_id对应的正负标签 # sample_mask 是为了保持tensor维度一致,padding部分的标签,若为0,则是padding的虚拟node_id if self.check_version(): with fluid.device_guard("cpu"): sample_nodes, sample_label, sample_mask = fluid.contrib.layers.tdm_sampler( x=item_label, neg_samples_num_list=self.neg_sampling_list, layer_node_num_list=self.layer_node_num_list, leaf_node_num=self.leaf_node_nums, tree_travel_attr=fluid.ParamAttr(name="TDM_Tree_Travel"), tree_layer_attr=fluid.ParamAttr(name="TDM_Tree_Layer"), output_positive=self.output_positive, output_list=True, seed=0, tree_dtype='int64', dtype='int64') else: sample_nodes, sample_label, sample_mask = fluid.contrib.layers.tdm_sampler( x=item_label, neg_samples_num_list=self.neg_sampling_list, layer_node_num_list=self.layer_node_num_list, leaf_node_num=self.leaf_node_nums, tree_travel_attr=fluid.ParamAttr(name="TDM_Tree_Travel"), tree_layer_attr=fluid.ParamAttr(name="TDM_Tree_Layer"), output_positive=self.output_positive, output_list=True, seed=0, tree_dtype='int64', dtype='int64') sample_nodes = [ fluid.layers.reshape(sample_nodes[i], [-1, 1]) for i in range(self.max_layers) ] # 查表得到每个节点的Embedding sample_nodes_emb = [ fluid.layers.embedding( input=sample_nodes[i], is_sparse=True, size=[self.node_nums, self.node_emb_size], param_attr=fluid.ParamAttr(name="TDM_Tree_Emb")) for i in range(self.max_layers) ] # 此处进行Reshape是为了之后层次化的分类器训练 sample_nodes_emb = [ fluid.layers.reshape(sample_nodes_emb[i], [ -1, self.neg_sampling_list[i] + self.output_positive, self.node_emb_size ]) for i in range(self.max_layers) ] # 对输入的input_emb进行转换,使其维度与node_emb维度一致 input_trans_emb = self.input_trans_layer(input_emb) # 分类器的主体网络,分别训练不同层次的分类器 layer_classifier_res = self.classifier_layer(input_trans_emb, sample_nodes_emb) # 最后的概率判别FC,将所有层次的node分类结果放到一起以相同的标准进行判别 # 考虑到树极大可能不平衡,有些item不在最后一层,所以需要这样的机制保证每个item都有机会被召回 tdm_fc = fluid.layers.fc( input=layer_classifier_res, size=2, act=None, num_flatten_dims=2, param_attr=fluid.ParamAttr(name="tdm.cls_fc.weight"), bias_attr=fluid.ParamAttr(name="tdm.cls_fc.bias")) # 将loss打平,放到一起计算整体网络的loss tdm_fc_re = fluid.layers.reshape(tdm_fc, [-1, 2]) # 若想对各个层次的loss辅以不同的权重,则在此处无需concat # 支持各个层次分别计算loss,再乘相应的权重 sample_label = fluid.layers.concat(sample_label, axis=1) labels_reshape = fluid.layers.reshape(sample_label, [-1, 1]) labels_reshape.stop_gradient = True # 计算整体的loss并得到softmax的输出 cost, softmax_prob = fluid.layers.softmax_with_cross_entropy( logits=tdm_fc_re, label=labels_reshape, return_softmax=True) # 通过mask过滤掉虚拟节点的loss sample_mask = fluid.layers.concat(sample_mask, axis=1) mask_reshape = fluid.layers.reshape(sample_mask, [-1, 1]) mask_index = fluid.layers.where(mask_reshape != 0) mask_index.stop_gradient = True self.mask_cost = fluid.layers.gather_nd(cost, mask_index) softmax_prob = fluid.layers.unsqueeze(input=softmax_prob, axes=[1]) self.mask_prob = fluid.layers.gather_nd(softmax_prob, mask_index) self.mask_label = fluid.layers.gather_nd(labels_reshape, mask_index) self._predict = self.mask_prob
def net(self): is_distributed = True if envs.get_trainer() == "CtrTrainer" else False neg_num = int( envs.get_global_env("hyper_parameters.neg_num", None, self._namespace)) sparse_feature_number = envs.get_global_env( "hyper_parameters.sparse_feature_number", None, self._namespace) sparse_feature_dim = envs.get_global_env( "hyper_parameters.sparse_feature_dim", None, self._namespace) with_shuffle_batch = bool( int( envs.get_global_env("hyper_parameters.with_shuffle_batch", None, self._namespace))) def embedding_layer(input, table_name, emb_dim, initializer_instance=None, squeeze=False): emb = fluid.embedding( input=input, is_sparse=True, is_distributed=is_distributed, size=[sparse_feature_number, emb_dim], param_attr=fluid.ParamAttr(name=table_name, initializer=initializer_instance), ) if squeeze: return fluid.layers.squeeze(input=emb, axes=[1]) else: return emb init_width = 0.5 / sparse_feature_dim emb_initializer = fluid.initializer.Uniform(-init_width, init_width) emb_w_initializer = fluid.initializer.Constant(value=0.0) input_emb = embedding_layer(self.input_word, "emb", sparse_feature_dim, emb_initializer, True) true_emb_w = embedding_layer(self.true_word, "emb_w", sparse_feature_dim, emb_w_initializer, True) true_emb_b = embedding_layer(self.true_word, "emb_b", 1, emb_w_initializer, True) if with_shuffle_batch: neg_emb_w_list = [] for i in range(neg_num): neg_emb_w_list.append( fluid.contrib.layers.shuffle_batch( true_emb_w)) # shuffle true_word neg_emb_w_concat = fluid.layers.concat(neg_emb_w_list, axis=0) neg_emb_w = fluid.layers.reshape( neg_emb_w_concat, shape=[-1, neg_num, sparse_feature_dim]) neg_emb_b_list = [] for i in range(neg_num): neg_emb_b_list.append( fluid.contrib.layers.shuffle_batch( true_emb_b)) # shuffle true_word neg_emb_b = fluid.layers.concat(neg_emb_b_list, axis=0) neg_emb_b_vec = fluid.layers.reshape(neg_emb_b, shape=[-1, neg_num]) else: neg_emb_w = embedding_layer(self.neg_word, "emb_w", sparse_feature_dim, emb_w_initializer) neg_emb_b = embedding_layer(self.neg_word, "emb_b", 1, emb_w_initializer) neg_emb_b_vec = fluid.layers.reshape(neg_emb_b, shape=[-1, neg_num]) true_logits = fluid.layers.elementwise_add( fluid.layers.reduce_sum(fluid.layers.elementwise_mul( input_emb, true_emb_w), dim=1, keep_dim=True), true_emb_b) input_emb_re = fluid.layers.reshape(input_emb, shape=[-1, 1, sparse_feature_dim]) neg_matmul = fluid.layers.matmul(input_emb_re, neg_emb_w, transpose_y=True) neg_logits = fluid.layers.elementwise_add( fluid.layers.reshape(neg_matmul, shape=[-1, neg_num]), neg_emb_b_vec) label_ones = fluid.layers.fill_constant_batch_size_like( true_logits, shape=[-1, 1], value=1.0, dtype='float32') label_zeros = fluid.layers.fill_constant_batch_size_like( true_logits, shape=[-1, neg_num], value=0.0, dtype='float32') true_xent = fluid.layers.sigmoid_cross_entropy_with_logits( true_logits, label_ones) neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits( neg_logits, label_zeros) cost = fluid.layers.elementwise_add( fluid.layers.reduce_sum(true_xent, dim=1), fluid.layers.reduce_sum(neg_xent, dim=1)) self.avg_cost = fluid.layers.reduce_mean(cost) global_right_cnt = fluid.layers.create_global_var( name="global_right_cnt", persistable=True, dtype='float32', shape=[1], value=0) global_total_cnt = fluid.layers.create_global_var( name="global_total_cnt", persistable=True, dtype='float32', shape=[1], value=0) global_right_cnt.stop_gradient = True global_total_cnt.stop_gradient = True
def net(self, inputs, is_infer=False): init_value_ = 0.1 is_distributed = True if envs.get_trainer() == "CtrTrainer" else False # ------------------------- network input -------------------------- raw_feat_idx = self._sparse_data_var[1] raw_feat_value = self._dense_data_var[0] self.label = self._sparse_data_var[0] feat_idx = raw_feat_idx feat_value = fluid.layers.reshape( raw_feat_value, [-1, self.num_field, 1]) # None * num_field * 1 first_weights_re = fluid.embedding( input=feat_idx, is_sparse=True, is_distributed=is_distributed, dtype='float32', size=[self.sparse_feature_number + 1, 1], padding_idx=0, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_), regularizer=fluid.regularizer.L1DecayRegularizer(self.reg))) first_weights = fluid.layers.reshape(first_weights_re, shape=[-1, self.num_field, 1]) # None * num_field * 1 y_first_order = fluid.layers.reduce_sum((first_weights * feat_value), 1) # ------------------------- second order term -------------------------- feat_embeddings_re = fluid.embedding( input=feat_idx, is_sparse=True, is_distributed=is_distributed, dtype='float32', size=[self.sparse_feature_number + 1, self.sparse_feature_dim], padding_idx=0, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_ / math.sqrt(float(self.sparse_feature_dim))))) feat_embeddings = fluid.layers.reshape( feat_embeddings_re, shape=[-1, self.num_field, self.sparse_feature_dim ]) # None * num_field * embedding_size # None * num_field * embedding_size feat_embeddings = feat_embeddings * feat_value # sum_square part summed_features_emb = fluid.layers.reduce_sum( feat_embeddings, 1) # None * embedding_size summed_features_emb_square = fluid.layers.square( summed_features_emb) # None * embedding_size # square_sum part squared_features_emb = fluid.layers.square( feat_embeddings) # None * num_field * embedding_size squared_sum_features_emb = fluid.layers.reduce_sum( squared_features_emb, 1) # None * embedding_size y_second_order = 0.5 * fluid.layers.reduce_sum( summed_features_emb_square - squared_sum_features_emb, 1, keep_dim=True) # None * 1 # ------------------------- DNN -------------------------- y_dnn = fluid.layers.reshape( feat_embeddings, [-1, self.num_field * self.sparse_feature_dim]) for s in self.layer_sizes: y_dnn = fluid.layers.fc( input=y_dnn, size=s, act=self.act, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_ / math.sqrt(float(10)))), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_))) y_dnn = fluid.layers.fc( input=y_dnn, size=1, act=None, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_))) # ------------------------- DeepFM -------------------------- self.predict = fluid.layers.sigmoid(y_first_order + y_second_order + y_dnn) cost = fluid.layers.log_loss(input=self.predict, label=fluid.layers.cast( self.label, "float32")) avg_cost = fluid.layers.reduce_sum(cost) self._cost = avg_cost predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1) label_int = fluid.layers.cast(self.label, 'int64') auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d, label=label_int, slide_steps=0) self._metrics["AUC"] = auc_var self._metrics["BATCH_AUC"] = batch_auc_var if is_infer: self._infer_results["AUC"] = auc_var
def deepfm_net(self): init_value_ = 0.1 is_distributed = True if envs.get_trainer() == "CtrTrainer" else False sparse_feature_number = envs.get_global_env( "hyper_parameters.sparse_feature_number", None, self._namespace) sparse_feature_dim = envs.get_global_env( "hyper_parameters.sparse_feature_dim", None, self._namespace) # ------------------------- network input -------------------------- num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace) raw_feat_idx = self._sparse_data_var[1] raw_feat_value = self._dense_data_var[0] self.label = self._sparse_data_var[0] feat_idx = raw_feat_idx feat_value = fluid.layers.reshape( raw_feat_value, [-1, num_field, 1]) # None * num_field * 1 reg = envs.get_global_env("hyper_parameters.reg", 1e-4, self._namespace) first_weights_re = fluid.embedding( input=feat_idx, is_sparse=True, is_distributed=is_distributed, dtype='float32', size=[sparse_feature_number + 1, 1], padding_idx=0, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_), regularizer=fluid.regularizer.L1DecayRegularizer(reg))) first_weights = fluid.layers.reshape(first_weights_re, shape=[-1, num_field, 1]) # None * num_field * 1 y_first_order = fluid.layers.reduce_sum((first_weights * feat_value), 1) # ------------------------- second order term -------------------------- feat_embeddings_re = fluid.embedding( input=feat_idx, is_sparse=True, is_distributed=is_distributed, dtype='float32', size=[sparse_feature_number + 1, sparse_feature_dim], padding_idx=0, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_ / math.sqrt(float(sparse_feature_dim))))) feat_embeddings = fluid.layers.reshape( feat_embeddings_re, shape=[-1, num_field, sparse_feature_dim]) # None * num_field * embedding_size feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size # sum_square part summed_features_emb = fluid.layers.reduce_sum( feat_embeddings, 1) # None * embedding_size summed_features_emb_square = fluid.layers.square( summed_features_emb) # None * embedding_size # square_sum part squared_features_emb = fluid.layers.square( feat_embeddings) # None * num_field * embedding_size squared_sum_features_emb = fluid.layers.reduce_sum( squared_features_emb, 1) # None * embedding_size y_second_order = 0.5 * fluid.layers.reduce_sum( summed_features_emb_square - squared_sum_features_emb, 1, keep_dim=True) # None * 1 # ------------------------- DNN -------------------------- layer_sizes = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace) act = envs.get_global_env("hyper_parameters.act", None, self._namespace) y_dnn = fluid.layers.reshape(feat_embeddings, [-1, num_field * sparse_feature_dim]) for s in layer_sizes: y_dnn = fluid.layers.fc( input=y_dnn, size=s, act=act, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_ / math.sqrt(float(10)))), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_))) y_dnn = fluid.layers.fc( input=y_dnn, size=1, act=None, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_))) # ------------------------- DeepFM -------------------------- self.predict = fluid.layers.sigmoid(y_first_order + y_second_order + y_dnn)
def net(self, inputs, is_infer=False): raw_feat_idx = self._sparse_data_var[1] raw_feat_value = self._dense_data_var[0] self.label = self._sparse_data_var[0] init_value_ = 0.1 initer = fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_) is_distributed = True if envs.get_trainer() == "CtrTrainer" else False # ------------------------- network input -------------------------- feat_idx = raw_feat_idx feat_value = fluid.layers.reshape( raw_feat_value, [-1, self.num_field, 1]) # None * num_field * 1 feat_embeddings = fluid.embedding( input=feat_idx, is_sparse=True, dtype='float32', size=[self.sparse_feature_number + 1, self.sparse_feature_dim], padding_idx=0, param_attr=fluid.ParamAttr(initializer=initer)) feat_embeddings = fluid.layers.reshape(feat_embeddings, [ -1, self.num_field, self.sparse_feature_dim ]) # None * num_field * embedding_size feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size # -------------------- linear -------------------- weights_linear = fluid.embedding( input=feat_idx, is_sparse=True, dtype='float32', size=[self.sparse_feature_number + 1, 1], padding_idx=0, param_attr=fluid.ParamAttr(initializer=initer)) weights_linear = fluid.layers.reshape( weights_linear, [-1, self.num_field, 1]) # None * num_field * 1 b_linear = fluid.layers.create_parameter( shape=[1], dtype='float32', default_initializer=fluid.initializer.ConstantInitializer(value=0)) y_linear = fluid.layers.reduce_sum( (weights_linear * feat_value), 1) + b_linear # -------------------- CIN -------------------- Xs = [feat_embeddings] last_s = self.num_field for s in self.layer_sizes_cin: # calculate Z^(k+1) with X^k and X^0 X_0 = fluid.layers.reshape( fluid.layers.transpose(Xs[0], [0, 2, 1]), [-1, self.sparse_feature_dim, self.num_field, 1]) # None, embedding_size, num_field, 1 X_k = fluid.layers.reshape( fluid.layers.transpose(Xs[-1], [0, 2, 1]), [-1, self.sparse_feature_dim, 1, last_s]) # None, embedding_size, 1, last_s Z_k_1 = fluid.layers.matmul( X_0, X_k) # None, embedding_size, num_field, last_s # compresses Z^(k+1) to X^(k+1) Z_k_1 = fluid.layers.reshape(Z_k_1, [ -1, self.sparse_feature_dim, last_s * self.num_field ]) # None, embedding_size, last_s*num_field Z_k_1 = fluid.layers.transpose( Z_k_1, [0, 2, 1]) # None, s*num_field, embedding_size Z_k_1 = fluid.layers.reshape( Z_k_1, [-1, last_s * self.num_field, 1, self.sparse_feature_dim] ) # None, last_s*num_field, 1, embedding_size (None, channal_in, h, w) X_k_1 = fluid.layers.conv2d( Z_k_1, num_filters=s, filter_size=(1, 1), act=None, bias_attr=False, param_attr=fluid.ParamAttr( initializer=initer)) # None, s, 1, embedding_size X_k_1 = fluid.layers.reshape( X_k_1, [-1, s, self.sparse_feature_dim]) # None, s, embedding_size Xs.append(X_k_1) last_s = s # sum pooling y_cin = fluid.layers.concat(Xs[1:], 1) # None, (num_field++), embedding_size y_cin = fluid.layers.reduce_sum(y_cin, -1) # None, (num_field++) y_cin = fluid.layers.fc(input=y_cin, size=1, act=None, param_attr=fluid.ParamAttr(initializer=initer), bias_attr=None) y_cin = fluid.layers.reduce_sum(y_cin, dim=-1, keep_dim=True) # -------------------- DNN -------------------- y_dnn = fluid.layers.reshape( feat_embeddings, [-1, self.num_field * self.sparse_feature_dim]) for s in self.layer_sizes_dnn: y_dnn = fluid.layers.fc( input=y_dnn, size=s, act=self.act, param_attr=fluid.ParamAttr(initializer=initer), bias_attr=None) y_dnn = fluid.layers.fc(input=y_dnn, size=1, act=None, param_attr=fluid.ParamAttr(initializer=initer), bias_attr=None) # ------------------- xDeepFM ------------------ self.predict = fluid.layers.sigmoid(y_linear + y_cin + y_dnn) cost = fluid.layers.log_loss( input=self.predict, label=fluid.layers.cast(self.label, "float32"), epsilon=0.0000001) batch_cost = fluid.layers.reduce_mean(cost) self._cost = batch_cost # for auc predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1) label_int = fluid.layers.cast(self.label, 'int64') auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d, label=label_int, slide_steps=0) self._metrics["AUC"] = auc_var self._metrics["BATCH_AUC"] = batch_auc_var if is_infer: self._infer_results["AUC"] = auc_var
def xdeepfm_net(self): init_value_ = 0.1 initer = fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_) is_distributed = True if envs.get_trainer() == "CtrTrainer" else False sparse_feature_number = envs.get_global_env( "hyper_parameters.sparse_feature_number", None, self._namespace) sparse_feature_dim = envs.get_global_env( "hyper_parameters.sparse_feature_dim", None, self._namespace) # ------------------------- network input -------------------------- num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace) raw_feat_idx = self._sparse_data_var[1] raw_feat_value = self._dense_data_var[0] self.label = self._sparse_data_var[0] feat_idx = raw_feat_idx feat_value = fluid.layers.reshape( raw_feat_value, [-1, num_field, 1]) # None * num_field * 1 feat_embeddings = fluid.embedding( input=feat_idx, is_sparse=True, dtype='float32', size=[sparse_feature_number + 1, sparse_feature_dim], padding_idx=0, param_attr=fluid.ParamAttr(initializer=initer)) feat_embeddings = fluid.layers.reshape(feat_embeddings, [ -1, num_field, sparse_feature_dim ]) # None * num_field * embedding_size feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size # -------------------- linear -------------------- weights_linear = fluid.embedding( input=feat_idx, is_sparse=True, dtype='float32', size=[sparse_feature_number + 1, 1], padding_idx=0, param_attr=fluid.ParamAttr(initializer=initer)) weights_linear = fluid.layers.reshape( weights_linear, [-1, num_field, 1]) # None * num_field * 1 b_linear = fluid.layers.create_parameter( shape=[1], dtype='float32', default_initializer=fluid.initializer.ConstantInitializer(value=0)) y_linear = fluid.layers.reduce_sum( (weights_linear * feat_value), 1) + b_linear # -------------------- CIN -------------------- layer_sizes_cin = envs.get_global_env( "hyper_parameters.layer_sizes_cin", None, self._namespace) Xs = [feat_embeddings] last_s = num_field for s in layer_sizes_cin: # calculate Z^(k+1) with X^k and X^0 X_0 = fluid.layers.reshape( fluid.layers.transpose(Xs[0], [0, 2, 1]), [-1, sparse_feature_dim, num_field, 1]) # None, embedding_size, num_field, 1 X_k = fluid.layers.reshape( fluid.layers.transpose(Xs[-1], [0, 2, 1]), [-1, sparse_feature_dim, 1, last_s]) # None, embedding_size, 1, last_s Z_k_1 = fluid.layers.matmul( X_0, X_k) # None, embedding_size, num_field, last_s # compresses Z^(k+1) to X^(k+1) Z_k_1 = fluid.layers.reshape(Z_k_1, [ -1, sparse_feature_dim, last_s * num_field ]) # None, embedding_size, last_s*num_field Z_k_1 = fluid.layers.transpose( Z_k_1, [0, 2, 1]) # None, s*num_field, embedding_size Z_k_1 = fluid.layers.reshape( Z_k_1, [-1, last_s * num_field, 1, sparse_feature_dim] ) # None, last_s*num_field, 1, embedding_size (None, channal_in, h, w) X_k_1 = fluid.layers.conv2d( Z_k_1, num_filters=s, filter_size=(1, 1), act=None, bias_attr=False, param_attr=fluid.ParamAttr( initializer=initer)) # None, s, 1, embedding_size X_k_1 = fluid.layers.reshape( X_k_1, [-1, s, sparse_feature_dim]) # None, s, embedding_size Xs.append(X_k_1) last_s = s # sum pooling y_cin = fluid.layers.concat(Xs[1:], 1) # None, (num_field++), embedding_size y_cin = fluid.layers.reduce_sum(y_cin, -1) # None, (num_field++) y_cin = fluid.layers.fc(input=y_cin, size=1, act=None, param_attr=fluid.ParamAttr(initializer=initer), bias_attr=None) y_cin = fluid.layers.reduce_sum(y_cin, dim=-1, keep_dim=True) # -------------------- DNN -------------------- layer_sizes_dnn = envs.get_global_env( "hyper_parameters.layer_sizes_dnn", None, self._namespace) act = envs.get_global_env("hyper_parameters.act", None, self._namespace) y_dnn = fluid.layers.reshape(feat_embeddings, [-1, num_field * sparse_feature_dim]) for s in layer_sizes_dnn: y_dnn = fluid.layers.fc( input=y_dnn, size=s, act=act, param_attr=fluid.ParamAttr(initializer=initer), bias_attr=None) y_dnn = fluid.layers.fc(input=y_dnn, size=1, act=None, param_attr=fluid.ParamAttr(initializer=initer), bias_attr=None) # ------------------- xDeepFM ------------------ self.predict = fluid.layers.sigmoid(y_linear + y_cin + y_dnn)
def net(self, inputs, is_infer=False): init_value_ = 0.1 is_distributed = True if envs.get_trainer() == "CtrTrainer" else False # ------------------------- network input -------------------------- raw_feat_idx = self._sparse_data_var[1] raw_feat_value = self._dense_data_var[0] self.label = self._sparse_data_var[0] feat_idx = raw_feat_idx feat_value = fluid.layers.reshape( raw_feat_value, [-1, self.num_field, 1]) # None * num_field * 1 # ------------------------- Embedding -------------------------- feat_embeddings_re = fluid.embedding( input=feat_idx, is_sparse=True, is_distributed=is_distributed, dtype='float32', size=[self.sparse_feature_number + 1, self.sparse_feature_dim], padding_idx=0, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_ / math.sqrt(float(self.sparse_feature_dim))))) feat_embeddings = fluid.layers.reshape( feat_embeddings_re, shape=[-1, self.num_field, self.sparse_feature_dim ]) # None * num_field * embedding_size # None * num_field * embedding_size feat_embeddings = feat_embeddings * feat_value inter_input = feat_embeddings # ------------------------- interacting layer -------------------------- for _ in range(self.n_interacting_layers): interacting_layer_out = self.interacting_layer(inter_input) inter_input = interacting_layer_out # ------------------------- DNN -------------------------- dnn_input = fluid.layers.flatten(interacting_layer_out, axis=1) y_dnn = fluid.layers.fc( input=dnn_input, size=1, act=None, param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_))) self.predict = fluid.layers.sigmoid(y_dnn) cost = fluid.layers.log_loss(input=self.predict, label=fluid.layers.cast( self.label, "float32")) avg_cost = fluid.layers.reduce_sum(cost) self._cost = avg_cost predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1) label_int = fluid.layers.cast(self.label, 'int64') auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d, label=label_int, slide_steps=0) self._metrics["AUC"] = auc_var self._metrics["BATCH_AUC"] = batch_auc_var if is_infer: self._infer_results["AUC"] = auc_var