def _interaction_semantic_feature_layer(self, seq_input_left, seq_input_right, seq_len_left, seq_len_right, granularity="word"): assert granularity in ["char", "word"] #### embed emb_matrix = self._get_embedding_matrix(granularity) emb_seq_left = tf.nn.embedding_lookup(emb_matrix, seq_input_left) emb_seq_right = tf.nn.embedding_lookup(emb_matrix, seq_input_right) #### dropout random_seed = np.random.randint(10000000) emb_seq_left = word_dropout(emb_seq_left, training=self.training, dropout=self.params["embedding_dropout"], seed=random_seed) random_seed = np.random.randint(10000000) emb_seq_right = word_dropout(emb_seq_right, training=self.training, dropout=self.params["embedding_dropout"], seed=random_seed) #### encode enc_seq_left = encode(emb_seq_left, method=self.params["encode_method"], params=self.params, sequence_length=seq_len_left, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "enc_seq_%s"%granularity, reuse=False) enc_seq_right = encode(emb_seq_right, method=self.params["encode_method"], params=self.params, sequence_length=seq_len_right, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "enc_seq_%s" % granularity, reuse=True) #### attend # [batchsize, s1, s2] att_mat = tf.einsum("abd,acd->abc", enc_seq_left, enc_seq_right) feature_dim = self.params["encode_dim"] + self.params["max_seq_len_%s"%granularity] att_seq_left = attend(enc_seq_left, context=att_mat, feature_dim=feature_dim, method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s"%granularity, reuse=False) att_seq_right = attend(enc_seq_right, context=tf.transpose(att_mat), feature_dim=feature_dim, method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s" % granularity, reuse=True) #### MLP nonlinear projection sem_seq_left = self._mlp_layer(att_seq_left, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s"%granularity, reuse=False) sem_seq_right = self._mlp_layer(att_seq_right, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s" % granularity, reuse=True) return emb_seq_left, enc_seq_left, att_seq_left, sem_seq_left, \ emb_seq_right, enc_seq_right, att_seq_right, sem_seq_right
def _semantic_feature_layer(self, seq_input, seq_len, granularity="word", reuse=False): assert granularity in ["char", "word"] #### embed emb_matrix = self._get_embedding_matrix(granularity) emb_seq = tf.nn.embedding_lookup(emb_matrix, seq_input) #### dropout random_seed = np.random.randint(10000000) emb_seq = word_dropout(emb_seq, training=self.training, dropout=self.params["embedding_dropout"], seed=random_seed) #### encode input_dim = self.params["embedding_dim"] enc_seq = encode(emb_seq, method=self.params["encode_method"], input_dim=input_dim, params=self.params, sequence_length=seq_len, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "enc_seq_%s" % granularity, reuse=reuse, training=self.training) #### attend feature_dim = self.params["encode_dim"] context = None att_seq = attend(enc_seq, context=context, encode_dim=self.params["encode_dim"], feature_dim=feature_dim, attention_dim=self.params["attention_dim"], method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s" % granularity, reuse=reuse, num_heads=self.params["attention_num_heads"]) #### MLP nonlinear projection sem_seq = mlp_layer(att_seq, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s" % granularity, reuse=reuse, training=self.training, seed=self.params["random_seed"]) return emb_seq, enc_seq, att_seq, sem_seq
def _semantic_feature_layer(self, seq_input, granularity="word", reuse=False, return_enc=False): assert granularity in ["char", "word"] #### embed emb_matrix = self._get_embedding_matrix(granularity) emb_seq = tf.nn.embedding_lookup(emb_matrix, seq_input) #### dropout emb_seq = word_dropout(emb_seq, training=self.training, dropout=self.params["embedding_dropout"], seed=self.params["random_seed"]) #### encode enc_seq = encode(emb_seq, method=self.params["encode_method"], params=self.params, scope_name=self.model_name + "enc_seq_%s" % granularity, reuse=reuse) #### attend feature_dim = self.params["encode_dim"] context = None att_seq = attend(enc_seq, context=context, feature_dim=feature_dim, method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s" % granularity, reuse=reuse) #### MLP nonlinear projection sem_seq = self._mlp_layer(att_seq, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s" % granularity, reuse=reuse) if return_enc: return sem_seq, enc_seq else: return sem_seq
def _build_task_graph(self, task_name): #### tf vars self.task_labels[task_name] = tf.placeholder(tf.int32, shape=[None], name="task_labels") self.labels[task_name] = tf.placeholder(tf.int32, shape=[None], name="labels") self.seq_word[task_name] = tf.placeholder(tf.int32, shape=[None, None], name="seq_word") #### embedding emb_seq = tf.nn.embedding_lookup(self.emb_matrix, self.seq_word[task_name]) emb_seq = word_dropout(emb_seq, training=self.training, dropout=self.params["embedding_dropout"], seed=self.params["random_seed"]) #### features shared_features = self._shared_feature_extractor(emb_seq, seq_len=None) private_features = self._private_feature_extractor(emb_seq, seq_len=None, task_name=task_name) feature = tf.concat([shared_features, private_features], axis=1) feature = tf.layers.Dropout(self.params["fc_dropout"])(feature, self.training) #### task classifier # for mtl-dataset, label is 0/1 for all the tasks logits = tf.layers.dense(feature, 2) probas = tf.nn.softmax(logits) loss_task = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.labels[task_name], logits=logits) loss_task = tf.reduce_mean(loss_task) #### auxiliary losses loss_adv = self._adversarial_loss(shared_features, self.task_labels[task_name]) loss_diff = self._difference_loss(shared_features, private_features) loss_domain = self._domain_loss(private_features, self.task_labels[task_name]) #### overall loss loss = loss_task if "loss_adv_weight" in self.params and self.params[ "loss_adv_weight"] > 0: loss += self.params["loss_adv_weight"] * loss_adv if "loss_diff_weight" in self.params and self.params[ "loss_diff_weight"] > 0: loss += self.params["loss_diff_weight"] * loss_diff if "loss_domain_weight" in self.params and self.params[ "loss_domain_weight"] > 0: loss += self.params["loss_domain_weight"] * loss_domain if "loss_l2_lambda" in self.params and self.params[ "loss_l2_lambda"] > 0: l2_losses = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if "bias" not in v.name ]) loss += self.params["loss_l2_lambda"] * l2_losses #### accuracy preds = tf.cast(tf.argmax(logits, axis=1), tf.int32) acc = tf.cast(tf.equal(preds, self.labels[task_name]), tf.float32) acc = tf.reduce_mean(acc) return probas, loss, acc