def create_emb(vocab_name, emb_name=None): if emb_name is None: emb_name = vocab_name vs = gezi.get('vocab_sizes') # 注意不使用keras.layers.Embedding 因为如果不用keras optimizer trainable=False不生效 # /juypter/dump-doc-lookup.ipynb # melt 自己维护var 验证生效 同时支持pro映射和部分trainable # https://github.com/keras-team/keras/issues/4952 Embedding = mt.layers.PrEmbedding embeddings_initializer = 'uniform' pretrain_path = None trainable = True if vocab_name == 'uid': trainable = FLAGS.train_uid_emb if vocab_name == 'did': trainable = FLAGS.train_did_emb kwargs = {} def _load_emb(name, path, trainable): emb = np.load(path) logging.info(name, emb.shape, vs[name][0]) assert emb.shape[0] == vs[name][0], f'{emb.shape[0]} {vs[name][0]}' # emb = emb[:vs[name][0]] # embeddings_initializer = tf.constant_initializer(emb) embeddings_initializer = emb pretrain_path = path # vs[name][0] = emb.shape[0] # change here to reset if vocab size larger then emb height # This is expected behavior. trainable=False is a keras concept; it doesn't automatically override the graph and op behavior of tf 1.x. # In order to do what you want, you should use the new style tf.keras.optimizers optimizers (or GradientTape) # which accept a list of variables to differentiate with respect to and the .trainable_weights attribute of Layers and Models which will filter based on .trainable. # TODO seems PrEmbedding is slow so just use Embedding 用户Embedding trainable再对应非keras模式keras optimizer会失效 False设置不起作用 Embedding = mt.layers.PrEmbedding kwargs['base_dim'] = emb.shape[1] # Embedding = keras.layers.Embedding return Embedding, embeddings_initializer, pretrain_path, trainable if FLAGS.use_entity_pretrain and vocab_name == 'entity': Embedding, embeddings_initializer, pretrain_path, trainable = _load_emb('entity', FLAGS.entity_pretrain, FLAGS.train_entity_emb) if FLAGS.use_word_pretrain and vocab_name == 'word': Embedding, embeddings_initializer, pretrain_path, trainable = _load_emb('word', FLAGS.word_pretrain, FLAGS.train_word_emb) if FLAGS.use_did_pretrain and vocab_name == 'did': Embedding, embeddings_initializer, pretrain_path, trainable = _load_emb('did', FLAGS.did_pretrain, FLAGS.train_did_emb) emb_height = vs[vocab_name][0] if not FLAGS.slim_emb_height else vs[vocab_name][1] if vocab_name == 'uid': emb_height = vs[vocab_name][1] logging.info(vocab_name, vs[vocab_name][0], vs[vocab_name][1], f'({emb_height}, {FLAGS.emb_size})', pretrain_path, embeddings_initializer, trainable) # TODO 使用keras Embeding 名字 ok 但是PrEmbedding会重复一层 比如 cat_emb/cat_emb return Embedding(emb_height, FLAGS.emb_size, embeddings_initializer=embeddings_initializer, trainable=trainable, train_size=vs[vocab_name][1], name=f'{emb_name}_emb', **kwargs)
def create_emb(vocab_name, emb_name=None): if emb_name is None: emb_name = vocab_name vs = gezi.get('vocab_sizes') # Embedding = melt.layers.VEmbedding if FLAGS.use_vocab_emb else keras.layers.Embedding Embedding = melt.layers.PrEmbedding embeddings_initializer = 'uniform' trainable = True if vocab_name == 'uid': trainable = FLAGS.train_uid_emb if vocab_name == 'did': trainable = FLAGS.train_did_emb kwargs = {} def _load_emb(name, path, trainable): emb = np.load(path) logging.info(name, emb.shape, vs[name][0]) assert emb.shape[0] == vs[name][0], f'{emb.shape[0]} {vs[name][0]}' # emb = emb[:vs[name][0]] embeddings_initializer=tf.constant_initializer(emb) # vs[name][0] = emb.shape[0] # change here to reset if vocab size larger then emb height # This is expected behavior. trainable=False is a keras concept; it doesn't automatically override the graph and op behavior of tf 1.x. # In order to do what you want, you should use the new style tf.keras.optimizers optimizers (or GradientTape) # which accept a list of variables to differentiate with respect to and the .trainable_weights attribute of Layers and Models which will filter based on .trainable. # TODO seems PrEmbedding is slow so just use Embedding 用户Embedding trainable再对应非keras模式keras optimizer会失效 False设置不起作用 Embedding = melt.layers.PrEmbedding kwargs['base_dim'] = emb.shape[1] # Embedding = keras.layers.Embedding return Embedding, embeddings_initializer, trainable if FLAGS.use_entity_pretrain and vocab_name == 'entity': Embedding, embeddings_initializer, trainable = _load_emb('entity', FLAGS.entity_pretrain, FLAGS.train_entity_emb) if FLAGS.use_word_pretrain and vocab_name == 'word': Embedding, embeddings_initializer, trainable = _load_emb('word', FLAGS.word_pretrain, FLAGS.train_word_emb) if FLAGS.use_did_pretrain and vocab_name == 'did': Embedding, embeddings_initializer, trainable = _load_emb('did', FLAGS.did_pretrain, FLAGS.train_did_emb) emb_height = vs[vocab_name][0] if not FLAGS.slim_emb_height else vs[vocab_name][1] logging.info(vocab_name, vs[vocab_name][0], vs[vocab_name][1], emb_height, FLAGS.emb_size, embeddings_initializer, trainable) return Embedding(emb_height, FLAGS.emb_size, embeddings_initializer=embeddings_initializer, trainable=trainable, name=f'{emb_name}_emb', **kwargs)
def adjust(features, subset): if 'hist_len' not in features: try: features['hist_len'] = mt.length(features['history']) except Exception: features['hist_len'] = tf.ones_like(features['did']) if FLAGS.max_history: for key in features: if 'history' in key: max_history = FLAGS.max_history if 'enti' in key: max_history *= 2 if not FLAGS.fixed_pad: features[key] = features[key][:, :max_history] else: features[key] = mt.pad(features[key], max_history) # 注意按照nid去获取新闻测信息 did只是用作id特征 可能被mask features['ori_did'] = features['did'] features['ori_history'] = features['history'] if 'impressions' in features: features['ori_impressions'] = features['impressions'] features['did'] = mask_dids(features['did'], features['did_in_train'], subset, FLAGS.test_all_mask) features['uid'] = mask_uids(features['uid'], subset == 'train') if 'history' in features: features['history'] = unk_aug(features['history'], subset == 'train') mask_negative_weights(features, subset == 'train') vs = gezi.get('vocab_sizes') if FLAGS.min_count_unk and FLAGS.min_count: features['uid'] = get_id(features['uid'], vs['uid'][1]) features['did'] = get_id(features['did'], vs['did'][1]) if FLAGS.mask_history: features['history'] = get_id(features['history'], vs['did'][1]) if 'impressions' in features: features['impressions'] = get_id(features['impressions'], vs['did'][1]) if vs['uid'][1] < vs['uid'][0]: features['uid'] = get_id(features['uid'], vs['uid'][1]) return features
def adjust(features, subset): if 'hist_len' not in features: try: features['hist_len'] = melt.length(features['history']) except Exception: features['hist_len'] = tf.ones_like(features['did']) if FLAGS.max_history: for key in features: if key.startswith('history'): max_history = FLAGS.max_history if 'entity' in key: max_history *= 2 features[key] = features[key][:,:max_history] # 注意按照nid去获取新闻测信息 did只是用作id特征 可能被mask features['ori_did'] = features['did'] features['ori_history'] = features['history'] if 'impressions' in features: features['ori_impressions'] = features['impressions'] features['did'] = mask_dids(features['did'], features['did_in_train'], subset, FLAGS.test_all_mask) features['uid'] = mask_uids(features['uid'], subset=='train') try: features['history'] = unk_aug(features['history'], subset=='train') except Exception: pass mask_negative_weights(features, subset=='train') if FLAGS.min_count_unk and FLAGS.min_count: vs = gezi.get('vocab_sizes') features['uid'] = get_id(features['uid'], vs['uid'][1]) features['did'] = get_id(features['did'], vs['did'][1]) if FLAGS.mask_history: features['history'] = get_id(features['history'], vs['did'][1]) if 'impressins' in features: features['impressions'] = get_id(features['impressions'], vs['did'][1]) return features
def __init__(self): super(Model, self).__init__() self.mode = 'train' self.input_ = {} def _emb(vocab_name, emb_name=None): return util.create_emb(vocab_name, emb_name) self.uemb = _emb('uid') self.demb = _emb('did') self.cat_emb = _emb('cat') self.scat_emb = _emb('sub_cat') self.entity_emb = _emb('entity') self.entity_type_emb = _emb('entity_type') self.word_emb = _emb('word') self.hour_emb = Embedding(24, FLAGS.emb_size, name='hour_emb') self.weekday_emb = Embedding(7, FLAGS.emb_size, name='weekday_emb') self.fresh_hour_emb = Embedding(300, FLAGS.emb_size, name='fresh_hour_emb') # 7 * 24 self.fresh_day_emb = Embedding(50, FLAGS.emb_size, name='fresh_day_emb') self.position_emb = Embedding(300, FLAGS.emb_size, name='position_emb') self.title_lookup = melt.layers.LookupArray(FLAGS.title_lookup) self.doc_lookup = melt.layers.LookupArray(FLAGS.doc_lookup) self.title_encoder = TitleEncoder(self.word_emb) self.titles_encoder = TitlesEncoder(self.title_encoder) self.sum_pooling = melt.layers.SumPooling() self.mean_pooling = melt.layers.MeanPooling() self.pooling = melt.layers.Pooling(FLAGS.pooling) self.feat_pooling = melt.layers.Pooling(FLAGS.feat_pooling) self.his_simple_pooling = melt.layers.Pooling(FLAGS.his_simple_pooling) self.dense = Dense( 1) if not FLAGS.use_multi_dropout else melt.layers.MultiDorpout( 1, drop_rate=0.3) self.batch_norm = BatchNormalization() self.dropout = keras.layers.Dropout(FLAGS.dropout) # --arch-mlp-bot="13-512-256-64-16" --arch-mlp-top="512-256-1" activation = FLAGS.activation mlp_dims = [FLAGS.emb_size * 2, FLAGS.emb_size] if not FLAGS.big_mlp else [ FLAGS.emb_size * 4, FLAGS.emb_size * 2, FLAGS.emb_size ] self.dense_mlp = melt.layers.MLP(mlp_dims, activation=activation, drop_rate=FLAGS.mlp_dropout, name='dense_mlp') mlp_dims = [512, 256, 64] if not FLAGS.big_mlp else [1024, 512, 256] self.mlp = melt.layers.MLP(mlp_dims, activation=activation, drop_rate=FLAGS.mlp_dropout, name='mlp') self.his_encoder = util.get_encoder(FLAGS.his_encoder) self.his_dense = keras.layers.Dense(FLAGS.hidden_size) self.his_pooling = util.get_att_pooling(FLAGS.his_pooling) self.his_pooling2 = util.get_att_pooling(FLAGS.his_pooling2) self.cur_dense = keras.layers.Dense(FLAGS.hidden_size) if FLAGS.his_strategy.startswith('bst'): self.transformer = melt.layers.transformer.Encoder( num_layers=1, d_model=FLAGS.hidden_size, num_heads=FLAGS.num_heads, dff=FLAGS.hidden_size, maximum_position_encoding=None, activation=FLAGS.transformer_activation, rate=FLAGS.transformer_dropout) self.fusion = melt.layers.SemanticFusion(drop_rate=0.1) if FLAGS.feat_pooling == 'cin': from deepctr.layers.interaction import CIN self.cin = CIN(( 128, 128, ), 'relu', True, 0, 1024) self.feat_pooling = self.cin if FLAGS.aux_loss_rate or FLAGS.lm_target: vsize = gezi.get('vocab_sizes')['vid'][0] # hidden_size = FLAGS.hidden_size if FLAGS.his_encoder in ['lstm', 'gru'] else int(FLAGS.hidden_size / 2) hidden_size = int(FLAGS.hidden_size / 2) self.sampled_weight = self.add_weight( name='sampled_weight', shape=(vsize, hidden_size), #initializer = keras.initializers.RandomUniform(minval=-10, maxval=10, seed=None), dtype=tf.float32, trainable=True) self.sampled_bias = self.add_weight( name='sampled_bias', shape=(vsize, ), #initializer = keras.initializers.RandomUniform(minval=-10, maxval=10, seed=None), dtype=tf.float32, trainable=True) self.softmax_loss_function = melt.seq2seq.gen_sampled_softmax_loss_function( 5, vsize, weights=self.sampled_weight, biases=self.sampled_bias, log_uniform_sample=True, is_predict=False, sample_seed=1234)
def call(self, input): # TODO tf2 keras seem to auto append last dim so need this mt.try_squeeze_dim(input) if not FLAGS.batch_parse: util.adjust(input, self.mode) self.embs = [] self.feats = {} bs = mt.get_shape(input['did'], 0) def _add(feat, name): if _is_ok(name): self.feats[name] = feat self.embs += [feat] def _adds(feats, names): for feat, name in zip(feats, names): _add(feat, name) # -------------------------- user if FLAGS.use_uid: uemb = self.uemb(input['uid']) _add(uemb, 'uid') # -------------------------- doc if FLAGS.use_did: demb = self.demb(input['did']) _add(demb, 'did') # --------------------------- context if 'history' in input: hlen = mt.length(input['history']) hlen = tf.math.maximum(hlen, 1) if FLAGS.use_time_emb: _add(self.hour_emb(input['hour']), 'hour') _add(self.weekday_emb(input['weekday']), 'weekday') if FLAGS.use_fresh_emb: fresh = input['fresh'] fresh_day = tf.cast(fresh / (3600 * 12), fresh.dtype) fresh_hour = tf.cast(fresh / 3600, fresh.dtype) _add(self.fresh_day_emb(fresh_day), 'fresh_day') _add(self.fresh_hour_emb(fresh_hour), 'fresh_hour') if FLAGS.use_position_emb: _add(self.position_emb(input['position']), 'position') if FLAGS.use_history: dids = input['history'] if FLAGS.his_strategy == 'bst' or FLAGS.his_pooling == 'mhead': mask = tf.cast(tf.equal(dids, 0), dids.dtype) dids += mask hlen = tf.ones_like(hlen) * 50 hembs = self.demb(dids) his_embs = hembs his_embs = self.his_encoder(his_embs, hlen) self.his_embs = his_embs his_emb = self.his_pooling(demb, his_embs, hlen) _add(his_emb, 'his_id') # --------------- doc info doc_feats = gezi.get('doc_feats') doc_feat_lens = gezi.get('doc_feat_lens') doc = mt.lookup_feats(input['ori_did'], self.doc_lookup, doc_feats, doc_feat_lens) cat = tf.squeeze(doc['cat'], -1) sub_cat = tf.squeeze(doc['sub_cat'], -1) # title_entities = doc['title_entities'] # title_entity_types = doc['title_entity_types'] # abstract_entities = doc['abstract_entities'] # abstract_entity_types = doc['abstract_entity_types'] title_entities = input['title_entities'] title_entity_types = input['title_entity_types'] abstract_entities = input['abstract_entities'] abstract_entity_types = input['abstract_entity_types'] # mt.length 不用速度会慢 # prev_cat_emb = self.cat_emb(cat) # prev_scat_emb = self.scat_emb(cat) if _is_ok('cat'): cat_emb = self.cat_emb(cat) scat_emb = self.scat_emb(sub_cat) _adds( [ # prev_cat_emb, # prev_scat_emb, cat_emb, scat_emb, ], # ['cat', 'sub_cat', 'title_entity_types', 'abstract_entity_types', 'title_entities', 'abstract_entities'] [ # 'prev_cat', 'prev_scat', 'cat', 'sub_cat' ]) if _is_ok('enti'): title_entities = self.entities_encoder( tf.concat([title_entities, title_entity_types], -1)) abstract_entities = self.entities_encoder( tf.concat([abstract_entities, abstract_entity_types], -1)) _adds( [ # self.pooling(self.entity_emb(title_entities), mt.length(doc['title_entities'])), # self.pooling(self.entity_type_emb(title_entity_types), mt.length(doc['title_entity_types'])), # self.pooling(self.entity_emb(abstract_entities), mt.length(doc['abstract_entities'])), # self.pooling(self.entity_type_emb(abstract_entity_types), mt.length(doc['abstract_entity_types'])), title_entities, abstract_entities ], ['title_entities', 'abstract_entities']) # _adds( # [ # self.his_simple_pooling(self.entity_type_emb(input['history_title_entity_types']), mt.length(input['history_title_entity_types'])), # self.his_simple_pooling(self.entity_type_emb(input['history_abstract_entity_types']), mt.length(input['history_abstract_entity_types'])) # ], # ['history_title_entity_merge_types', 'history_abstract_entity_merge_types'] # ) input['history_title_entities'] = input[ 'history_title_entities'][:, :FLAGS.max_his_title_entities * FLAGS.max_lookup_history] input['history_title_entity_types'] = input[ 'history_title_entity_types'][:, :FLAGS. max_his_title_entities * FLAGS.max_lookup_history] input['history_abstract_entities'] = input[ 'history_abstract_entities'][:, :FLAGS.max_his_title_entities * FLAGS.max_lookup_history] input['history_abstract_entity_types'] = input[ 'history_abstract_entity_types'][:, :FLAGS. max_his_title_entities * FLAGS.max_lookup_history] _adds([ self.his_entity_pooling( title_entities, (self.entity_emb(input['history_title_entities']) + self.entity_type_emb(input['history_title_entity_types']) ), mt.length(input['history_title_entities'])), self.his_entity_pooling( abstract_entities, (self.entity_emb(input['history_abstract_entities']) + self.entity_type_emb( input['history_abstract_entity_types'])), mt.length(input['history_abstract_entities'])) ], ['his_title_merge_entities', 'his_abstract_merge_entities']) # --------------- history info dids = input['ori_history'] dids = dids[:, :FLAGS.max_lookup_history] hlen = mt.length(input['history']) hlen = tf.math.maximum(hlen, 1) his = mt.lookup_feats(dids, self.doc_lookup, doc_feats, doc_feat_lens) his_cats = his['cat'] his_cats = tf.squeeze(his_cats, -1) his_sub_cats = his['sub_cat'] his_sub_cats = tf.squeeze(his_sub_cats, -1) # his_title_entities = his['title_entities'] # his_title_entity_types = his['title_entity_types'] # his_abstract_entities = his['abstract_entities'] # his_abstract_entity_types = his['abstract_entity_types'] # his_title_entities = self.his_entities_encoder(tf.concat([his_title_entities, his_title_entity_types], -1), # tf.math.minimum(hlen, FLAGS.max_titles), title_entities) # his_abstract_entities = self.his_entities_encoder(tf.concat([his_abstract_entities, his_abstract_entity_types], -1), # tf.math.minimum(hlen, FLAGS.max_titles), abstract_entities) if _is_ok('cat'): # FIXME 当前如果直接展平 mt.length有问题 因为都是内壁 0 pad, 类似 2,3,0,0 1,0,0,0 会丢掉很多信息 填1 是一种方式 (v1就是这种 最多 1,1) # 另外也可以用encoder _adds( [ self.his_cat_pooling(self.cat_emb(his_cats), mt.length(his_cats)), self.his_cat_pooling(self.scat_emb(his_sub_cats), mt.length(his_sub_cats)), ## 对应cat din效果不如att(增加也没有收益) 对应title din效果比att好, entity也是din比较好 # self.his_scat_din_pooling(scat_emb, self.scat_emb(his_sub_cats), mt.length(his_sub_cats)), # his_title_entities, # his_abstract_entities, ], [ 'his_cats', 'his_sub_cats', # 'history_title_entities', 'history_abstract_entities' ]) if not FLAGS.bert_dir or not FLAGS.bert_only: if _is_ok('^cur_title&'): cur_title = self.title_encoder(doc['title']) his_titles = his['title'] if FLAGS.max_titles: his_titles = his_titles[:, :FLAGS.max_titles] his_title = self.titles_encoder( his_titles, tf.math.minimum(hlen, FLAGS.max_titles), cur_title) _adds([cur_title, his_title], ['cur_title', 'his_title']) if _is_ok('^abstract&'): cur_abstract = self.abstract_encoder(doc['abstract']) his_abstracts = his['abstract'] if FLAGS.max_abstracts: his_abstracts = his_abstracts[:, :FLAGS.max_abstracts] his_abstract = self.abstracts_encoder( his_abstracts, tf.math.minimum(hlen, FLAGS.max_abstracts), cur_abstract) _adds([cur_abstract, his_abstract], ['cur_abstract', 'his_abstract']) if FLAGS.use_body: if _is_ok('^body&'): cur_body = self.body_encoder(doc['body']) his_bodies = his['body'] if FLAGS.max_bodies: his_bodies = his_bodies[:, :FLAGS.max_bodies] his_body = self.bodies_encoder( his_bodies, tf.math.minimum(hlen, FLAGS.max_bodies), cur_body) _adds([ cur_body, his_body, ], ['cur_body', 'his_body']) if FLAGS.bert_dir: if _is_ok('bert_title'): bert_title = self.bert_title_encoder(doc['title_uncased']) max_titles = FLAGS.max_bert_titles his_bert_title = self.bert_titles_encoder( his['title_uncased'][:, :max_titles], tf.math.minimum(hlen, max_titles), bert_title) _adds([ bert_title, his_bert_title, ], ['bert_title', 'his_bert_title']) if _is_ok('bert_abstract') and FLAGS.bert_abstract: bert_abstract = self.bert_abstract_encoder( doc['abstract_uncased']) max_abstracts = FLAGS.max_bert_abstracts his_bert_abstract = self.bert_abstracts_encoder( his['abstract_uncased'][:, :max_abstracts], tf.math.minimum(hlen, max_abstracts), bert_abstract) _adds([ bert_abstract, his_bert_abstract, ], ['bert_abstract', 'his_bert_abstract']) if _is_ok('bert_body') and FLAGS.bert_body: bert_body = self.bert_body_encoder(doc['body_uncased']) max_bodies = FLAGS.max_bert_bodies his_bert_body = self.bert_bodies_encoder( his['body_uncased'][:, :max_bodies], tf.math.minimum(hlen, max_bodies), bert_body) _adds([ bert_body, his_bert_body, ], ['bert_body', 'his_bert_body']) if FLAGS.use_impression_titles: # dev +0.4% test下降 his_impression = mt.lookup_feats(input['impressions'], self.doc_lookup, doc_feats, doc_feat_lens) his_impression_titles = his_impression['title'] his_impression_title = self.titles_encoder2( his_impression_titles, mt.length(input['impressions']), cur_title) _adds([ his_impression_title, ], ['impression_title']) # 用impression id 会dev test不一致 不直接用id if FLAGS.use_impressions: _add(self.mean_pooling(self.demb(input['impressions'])), 'impressions') if FLAGS.use_dense: dense_emb = self.deal_dense(input) _add(dense_emb, 'dense') embs = self.embs # logging.info('-----------embs:', len(embs)) logging.info(self.feats.keys()) # logging.debug(self.feats) embs = [ x if len(mt.get_shape(x)) == 2 else tf.squeeze(x, 1) for x in embs ] embs = tf.stack(embs, axis=1) if FLAGS.batch_norm: embs = self.batch_norm(embs) if FLAGS.l2_normalize_before_pooling: x = tf.math.l2_normalize(embs, axis=FLAGS.l2_norm_axis) x = self.feat_pooling(embs) # if FLAGS.dropout: # x = self.dropout(x) if FLAGS.use_dense: x = tf.concat([x, dense_emb], axis=1) # if FLAGS.use_his_concat: # x = tf.concat([x, his_concat], axis=1) x = self.mlp(x) self.logit = self.dense(x) self.prob = tf.math.sigmoid(self.logit) self.impression_id = input['impression_id'] self.position = input['position'] self.history_len = input['hist_len'] self.impression_len = input['impression_len'] self.input_ = input return self.logit
def __init__(self, **kwargs): super(Model, self).__init__(**kwargs) self.mode = 'train' self.input_ = {} def _emb(vocab_name, emb_name=None): return util.create_emb(vocab_name, emb_name) self.uemb = _emb('uid') self.demb = _emb('did') self.cat_emb = _emb('cat') self.scat_emb = _emb('sub_cat') self.entity_emb = _emb('entity') self.entity_type_emb = _emb('entity_type') if not FLAGS.bert_dir or not FLAGS.bert_only: self.word_emb = _emb('word') self.hour_emb = Embedding(24, FLAGS.emb_size, name='hour_emb') self.weekday_emb = Embedding(7, FLAGS.emb_size, name='weekday_emb') self.fresh_hour_emb = Embedding(300, FLAGS.emb_size, name='fresh_hour_emb') # 7 * 24 self.fresh_day_emb = Embedding(50, FLAGS.emb_size, name='fresh_day_emb') self.position_emb = Embedding(300, FLAGS.emb_size, name='position_emb') # self.title_lookup = mt.layers.LookupArray(FLAGS.title_lookup, name='title_lookup') self.doc_lookup = mt.layers.LookupArray(FLAGS.doc_lookup, name='doc_lookup') if _is_ok('enti'): self.entities_encoder = Encoders( [self.entity_emb, self.entity_type_emb], None, FLAGS.pooling, name='entities_encoder') self.his_entities_encoder = SeqsEncoder( self.entities_encoder, None, FLAGS.seqs_pooling, name='his_entities_encoder') if not FLAGS.bert_dir or not FLAGS.bert_only: if _is_ok('^cur_title&') or _is_ok('abstract') or _is_ok('body'): if FLAGS.share_words_encoder: words_encoder = Encoder(self.word_emb, FLAGS.seqs_encoder, FLAGS.pooling, name='words_encoder') else: words_encoder = None if _is_ok('^cur_title&'): self.title_encoder = words_encoder or Encoder( self.word_emb, FLAGS.seqs_encoder, FLAGS.pooling, name='title_encoder') self.titles_encoder = SeqsEncoder(self.title_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='titles_encoder') self.titles_encoder2 = SeqsEncoder(self.title_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling2, name='titles_encoder2') if _is_ok('^abstract&'): self.abstract_encoder = words_encoder or Encoder( self.word_emb, FLAGS.seqs_encoder, name='abstract_encoder') self.abstracts_encoder = SeqsEncoder(self.abstract_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='abstracts_encoder') if _is_ok('^body&'): self.body_encoder = words_encoder or Encoder( self.word_emb, None, FLAGS.pooling, name='body_encoder') self.bodies_encoder = SeqsEncoder(self.body_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bodies_encoder') if FLAGS.bert_dir: # tpu 会做静态检查 bert_encoder会认为没有weight失败 # Weights for model bert_encoder have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape` tpu # max_input_length = None if not gezi.get('tpu') else FLAGS.max_bert_input_length max_input_length = None bert_encoder = mt.models.Bert( FLAGS.bert_dir, FLAGS.emb_size, max_input_length=max_input_length, return_sequences=FLAGS.bert_pooling_seqs, name='bert_encoder') self.bert_title_encoder = bert_encoder self.bert_abstract_encoder = bert_encoder self.bert_body_encoder = bert_encoder if FLAGS.bert_pooling_seqs: if FLAGS.share_words_encoder: bert_words_encoder = Encoder(None, bert_encoder, FLAGS.pooling, name='words_encoder') else: bert_words_encoder = None if _is_ok('bert_title'): self.bert_title_encoder = bert_words_encoder or Encoder( None, bert_encoder, FLAGS.pooling, name='bert_title_encoder') if _is_ok('bert_abstract'): self.bert_abstract_encoder = bert_words_encoder or Encoder( None, bert_encoder, FLAGS.pooling, name='bert_abstract_encoder') if _is_ok('bert_body'): self.bert_body_encoder = bert_words_encoder or Encoder( None, bert_encoder, FLAGS.pooling, name='bert_body_encoder') if _is_ok('bert_title'): self.bert_titles_encoder = SeqsEncoder( self.bert_title_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bert_titles_encoder') if _is_ok('bert_abstract'): self.bert_abstracts_encoder = SeqsEncoder( self.bert_abstract_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bert_abstracts_encoder') if _is_ok('bert_body'): self.bert_bodies_encoder = SeqsEncoder( self.bert_body_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bert_bodies_encoder') self.sum_pooling = mt.layers.SumPooling() self.mean_pooling = mt.layers.MeanPooling() self.pooling = mt.layers.Pooling(FLAGS.pooling) self.feat_pooling = mt.layers.Pooling(FLAGS.feat_pooling, name='feat_pooling') self.his_simple_pooling = mt.layers.Pooling(FLAGS.his_simple_pooling) # self.his_entity_pooling = mt.layers.Pooling('att', name='his_entity_pooling') self.his_entity_pooling = util.get_att_pooling( 'din', name='his_entity_pooling') self.his_cat_pooling = mt.layers.Pooling('att', name='his_cat_pooling') self.his_scat_din_pooling = util.get_att_pooling( 'din', name='his_scat_din_pooling') self.dense = Dense( 1) if not FLAGS.use_multi_dropout else mt.layers.MultiDropout( 1, drop_rate=0.3) self.batch_norm = BatchNormalization() self.dropout = keras.layers.Dropout(FLAGS.dropout) # --arch-mlp-bot="13-512-256-64-16" --arch-mlp-top="512-256-1" activation = FLAGS.activation mlp_dims = [FLAGS.emb_size * 2, FLAGS.emb_size] if not FLAGS.big_mlp else [ FLAGS.emb_size * 4, FLAGS.emb_size * 2, FLAGS.emb_size ] self.dense_mlp = mt.layers.MLP(mlp_dims, activation=activation, drop_rate=FLAGS.mlp_dropout, name='dense_mlp') mlp_dims = [512, 256, 64] if not FLAGS.big_mlp else [1024, 512, 256] self.mlp = mt.layers.MLP(mlp_dims, activation=activation, drop_rate=FLAGS.mlp_dropout, batch_norm=FLAGS.mlp_bn, name='mlp') self.his_encoder = util.get_encoder(FLAGS.his_encoder) self.his_dense = keras.layers.Dense(FLAGS.hidden_size) self.his_pooling = util.get_att_pooling(FLAGS.his_pooling) self.his_pooling2 = util.get_att_pooling(FLAGS.his_pooling2) self.cur_dense = keras.layers.Dense(FLAGS.hidden_size) if FLAGS.his_strategy.startswith('bst'): self.transformer = mt.layers.transformer.Encoder( num_layers=1, d_model=FLAGS.hidden_size, num_heads=FLAGS.num_heads, dff=FLAGS.hidden_size, maximum_position_encoding=None, activation=FLAGS.transformer_activation, rate=FLAGS.transformer_dropout) self.fusion = mt.layers.SemanticFusion(drop_rate=0.1) if FLAGS.feat_pooling == 'cin': from deepctr.layers.interaction import CIN self.cin = CIN(( 128, 128, ), 'relu', True, 0, 1024) self.feat_pooling = self.cin if FLAGS.aux_loss_rate or FLAGS.lm_target: vsize = gezi.get('vocab_sizes')['vid'][0] # hidden_size = FLAGS.hidden_size if FLAGS.his_encoder in ['lstm', 'gru'] else int(FLAGS.hidden_size / 2) hidden_size = int(FLAGS.hidden_size / 2) self.sampled_weight = self.add_weight( name='sampled_weight', shape=(vsize, hidden_size), # initializer = keras.initializers.RandomUniform(minval=-10, maxval=10, seed=None), dtype=tf.float32, trainable=True) self.sampled_bias = self.add_weight( name='sampled_bias', shape=(vsize, ), # initializer = keras.initializers.RandomUniform(minval=-10, maxval=10, seed=None), dtype=tf.float32, trainable=True) self.softmax_loss_function = mt.seq2seq.gen_sampled_softmax_loss_function( 5, vsize, weights=self.sampled_weight, biases=self.sampled_bias, log_uniform_sample=True, is_predict=False, sample_seed=1234)