def _conv_opt(inputs, window_size, out_filters, separable=False, ch_mul=1, is_mask=True): inp_d = inputs.get_shape()[1].value inp_l = inputs.get_shape()[2].value if is_mask: inputs = tf.transpose(inputs, [0, 2, 1]) # [batch_size, len, dim] valid_length = get_length(inputs) mask = tf.sequence_mask(valid_length, inp_l) mask = tf.expand_dims(mask, 1) mask = tf.tile(mask, [1, inp_d, 1]) #mask = tf.expand_dims(mask, 2) # [batch_size, dim, 1, len] inputs = tf.transpose(inputs, [0, 2, 1]) # [batch_size, len, dim] inputs = tf.reshape(inputs, [-1, inp_d, inp_l]) inputs = tf.where(mask, inputs, tf.zeros_like(inputs)) #print("is mask", inputs.shape) if separable == True: w_depth = create_weight("w_depth", [window_size, out_filters, ch_mul]) w_point = create_weight("w_point", [1, out_filters * ch_mul, out_filters]) out = tf.nn.separable_conv1d(inputs, w_depth, w_point, strides=1, padding="SAME", data_format='NCW') else: w = create_weight("w", [window_size, inp_d, out_filters]) #print("w_weight", w.shape, inputs.shape) out = tf.nn.conv1d(inputs, w, 1, "SAME", data_format='NCW') #print("out", out.shape) if is_mask: mask = tf.sequence_mask(valid_length, inp_l) mask = tf.expand_dims(mask, 1) mask = tf.tile(mask, [1, out_filters, 1]) #mask = tf.expand_dims(mask, 2) # [batch_size, dim, 1, len] out = tf.where(mask, out, tf.zeros_like(out)) out = tf.reshape(out, [-1, out_filters, inp_l]) return out
def _linear_combine(self, final_layers): with tf.variable_scope("linear_combine"): w = create_weight("w", [len(final_layers), 1, 1, 1]) w = tf.nn.softmax(w, axis=0) final_layer_tensor = tf.convert_to_tensor(final_layers) print("final_layer_tensor: {0}".format(final_layer_tensor)) x = tf.multiply(final_layer_tensor, w) x = tf.reduce_sum(x, axis=0) print("final_layer_tensor: {0}".format(x)) return x
def pool_op(inputs, is_training, count, out_filters, avg_or_max, start_idx=None): """ Args: start_idx: where to start taking the output channels. if None, assuming fixed_arc mode count: how many output_channels to take. """ inp_d = inputs.get_shape()[1].value with tf.variable_scope("conv_1"): w = create_weight("w", [1, inp_d, out_filters]) x = tf.nn.conv1d(inputs, w, 1, "SAME", data_format='NCW') x = batch_norm(x, is_training) x = tf.nn.relu(x) with tf.variable_scope("pool"): actual_data_format = "channels_first" if avg_or_max == "avg": x = tf.layers.average_pooling1d(x, 3, 1, "SAME", data_format=actual_data_format) elif avg_or_max == "max": x = tf.layers.max_pooling1d(x, 3, 1, "SAME", data_format=actual_data_format) else: raise ValueError("Unknown pool {}".format(avg_or_max)) if start_idx is not None: x = x[:, start_idx:start_idx + count, :] return x
def _embedding(self, inputs, vocab_size, num_units, zero_pad=True, scale=True, scope="embedding", reuse=None): with tf.variable_scope(scope, reuse=reuse): lookup_table = create_weight( "lookup_table", trainable=True, shape=[vocab_size, num_units], initializer=tf.contrib.layers.xavier_initializer()) if zero_pad: lookup_table = tf.concat( (tf.zeros(shape=[1, num_units]), lookup_table[1:, :]), 0) outputs = tf.nn.embedding_lookup(lookup_table, inputs) if scale: outputs = outputs * (num_units**0.5) return outputs
def _model(self, doc, bow_doc, datasets, is_training, reuse=False, mode="train"): with tf.variable_scope(self.name, reuse=reuse): layers = [] if is_training: self.valid_lengths = [] with tf.variable_scope('embed'): regularizer = tf.contrib.layers.l2_regularizer( scale=self.l2_reg) if self.embedding_model == "none": embedding = create_weight( "w", shape=self.embedding["none"].shape, trainable=True, initializer=tf.truncated_normal_initializer, regularizer=regularizer) elif self.embedding_model == "glove": embedding = create_weight( "w", shape=None, trainable=True, initializer=self.embedding["glove"], regularizer=regularizer) elif self.embedding_model == "word2vec": embedding = create_weight( "w", shape=None, trainable=True, initializer=self.embedding["word2vec"], regularizer=regularizer) elif self.embedding_model == "all": embedding_glove = create_weight( "w_glove", shape=None, trainable=True, initializer=self.embedding["glove"], regularizer=regularizer) print("embedding_glove: {0}".format( embedding_glove.get_shape())) embedding_word2vec = create_weight( "w_word2vec", shape=None, trainable=True, initializer=self.embedding["word2vec"], regularizer=regularizer) print("embedding_word2vec: {0}".format( embedding_word2vec.get_shape())) embedding = tf.concat( [embedding_glove, embedding_word2vec], axis=0) print("join embedding: {0}".format(embedding.get_shape())) field_embedding = create_weight( "w_field", shape=self.embedding["field"].shape, trainable=True, initializer=tf.truncated_normal_initializer, regularizer=regularizer) self.final_embedding = embedding print("embedding: {0}".format(embedding)) print("doc: {0}".format(doc)) print("bow_doc: {0}".format(bow_doc)) if is_training or mode == "valid": batch_size = self.batch_size else: batch_size = self.eval_batch_size if self.sliding_window: doc, sliding_windows = self._to_sliding_window(doc, batch_size, size=64, step=32) bow_doc, _ = self._to_sliding_window(bow_doc, batch_size, size=64, step=32) print("doc after sliding window: {0}".format(doc)) if is_training: embedding = tf.nn.dropout(embedding, keep_prob=self.embed_keep_prob) doc = tf.nn.embedding_lookup(embedding, doc, max_norm=None) field_embedding = tf.nn.embedding_lookup(field_embedding, bow_doc, max_norm=None) if self.input_field_embedding: doc = tf.add_n([doc, field_embedding]) doc = tf.transpose(doc, [0, 2, 1]) print("doc_shape", doc.shape) inp_c = doc.shape[1] inp_w = doc.shape[2] #doc = tf.reshape(doc, [-1, inp_c, 1, inp_w]) doc = tf.reshape(doc, [-1, inp_c, inp_w]) field_embedding = tf.transpose(field_embedding, [0, 2, 1]) #field_embedding = tf.reshape(field_embedding, [-1, inp_c, 1, inp_w]) field_embedding = tf.reshape(field_embedding, [-1, inp_c, inp_w]) print("after: doc, field_embedding", doc.shape, field_embedding.shape) x = doc pos_batch_size = 1 # initialize pos_embedding for transformer if self.input_positional_encoding: out_filters = 300 else: out_filters = self.out_filters if self.is_sinusolid: pos_embedding = self._positional_encoding( x, pos_batch_size, is_training, num_units=out_filters, zero_pad=False, scale=False, scope="enc_pe") else: pos_embedding = self._embedding(tf.tile( tf.expand_dims(tf.range(inp_w), 0), [pos_batch_size, 1]), vocab_size=inp_w, num_units=out_filters, reuse=tf.AUTO_REUSE, zero_pad=True, scale=False, scope="enc_pe") print("pos embedding: {0}".format(pos_embedding)) pos_embedding = tf.transpose(pos_embedding, [0, 2, 1]) #pos_embedding = tf.expand_dims(pos_embedding, axis=2) print("pos embedding: {0}".format(pos_embedding)) if self.input_positional_encoding: x += pos_embedding out_filters = self.out_filters with tf.variable_scope("init_conv"): # adjust out_filter dimension #print("init_x", x.shape) x = _conv_opt(x, 1, self.out_filters) x = batch_norm(x, is_training) layers.append(x) # sveral operations for nni def add_fixed_pooling_layer(layer_id, layers, out_filters, is_training, pos_embedding, field_embedding): '''Add a fixed pooling layer every four layers''' with tf.variable_scope("pos_embed_pool_{0}".format(layer_id)): pos_embedding = self._factorized_reduction( pos_embedding, out_filters, 2, is_training) with tf.variable_scope( "field_embed_pool_{0}".format(layer_id)): field_embedding = self._factorized_reduction( field_embedding, out_filters, 2, is_training) #out_filters *= 2 with tf.variable_scope("pool_at_{0}".format(layer_id)): pooled_layers = [] for i, layer in enumerate(layers): #print("pooling_layer", i, layer) with tf.variable_scope("from_{0}".format(i)): x = self._factorized_reduction( layer, out_filters, 2, is_training) #print("after x ", x) pooled_layers.append(x) layers = pooled_layers return layers, out_filters def post_process_out(inputs, out): '''Form skip connection and perform batch norm''' optional_inputs = inputs[1] print("post_process_out::", inputs, optional_inputs) with tf.variable_scope(get_layer_id()): with tf.variable_scope("skip"): #print("layers",layers) inputs = layers[-1] inp_d = inputs.get_shape()[1].value inp_l = inputs.get_shape()[2].value out.set_shape([None, out_filters, inp_l]) try: out = tf.add_n( [out, tf.reduce_sum(optional_inputs, axis=0)]) except Exception as e: print(e) out = batch_norm(out, is_training) layers.append(out) return out global layer_id layer_id = -1 def get_layer_id(): global layer_id layer_id += 1 return 'layer_' + str(layer_id) size = [1, 3, 5, 7] separables = [False, False, False, False] def conv(inputs, size, separable=False): # res_layers is pre_layers that are chosen to form skip connection # layers[-1] is always the latest input with tf.variable_scope(get_layer_id()): with tf.variable_scope('conv_' + str(size) + ( '_separable' if separable else '')): #print("conv_inputs::", inputs) dealed_inputs = tf.reduce_sum(inputs[1], axis=0) #print("dealed_inputs::", dealed_inputs) out = conv_op(dealed_inputs, size, is_training, out_filters, out_filters, start_idx=None, separable=separable) #layers.append(out) return out def pool(inputs, ptype): assert ptype in ['avg', 'max'], "pooling type must be avg or max" with tf.variable_scope(get_layer_id()): with tf.variable_scope('pooling_' + str(ptype)): #print("pool_inputs::", inputs) dealed_inputs = tf.reduce_sum(inputs[1], axis=0) #print("dealed_inputs::", dealed_inputs) out = pool_op(dealed_inputs, is_training, out_filters, out_filters, ptype, start_idx=None) #layers.append(out) return out def rnn(inputs): with tf.variable_scope(get_layer_id()): with tf.variable_scope('branch_6'): #print("rnn_inputs::", inputs) dealed_inputs = tf.reduce_sum(inputs[1], axis=0) #print("dealed_inputs::", dealed_inputs) out = recur_op(dealed_inputs, is_training, out_filters, out_filters, start_idx=0, lstm_x_keep_prob=self.lstm_x_keep_prob, lstm_h_keep_prob=self.lstm_h_keep_prob, lstm_o_keep_prob=self.lstm_o_keep_prob, var_rec=self.var_rec) #layers.append(out) return out def attention(inputs): with tf.variable_scope(get_layer_id()): with tf.variable_scope('branch_7'): #print("attention_inputs::", inputs) dealed_inputs = tf.reduce_sum(inputs[1], axis=0) #print("dealed_inputs::", dealed_inputs) out = attention_op( dealed_inputs, pos_embedding, field_embedding, is_training, out_filters, out_filters, start_idx=0, positional_encoding=self.positional_encoding, attention_keep_prob=self.attention_keep_prob, do_field_embedding=self.field_embedding) #layers.append(out) return out def final_process(inputs): with tf.variable_scope(get_layer_id()): with tf.variable_scope('final_out'): print("final_inputs::", inputs) dealed_inputs = tf.reduce_mean(inputs[1], axis=0) print("dealed_inputs::", dealed_inputs) out = dealed_inputs #out = tf.reduce_mean(inputs[1], axis=0) print("final_out::", inputs, out) layers.append(out) return out """@nni.mutable_layers( { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x], optional_input_size: 1, layer_output: layer_0_out_0 }, { layer_choice: [post_process_out(out=layer_0_out_0)], optional_inputs: [], optional_input_size: 1, layer_output: layer_0_out }, { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out], optional_input_size: 1, layer_output: layer_1_out_0 }, { layer_choice: [post_process_out(out=layer_1_out_0)], optional_inputs: [layer_0_out], optional_input_size: 1, layer_output: layer_1_out }, { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out, layer_1_out], optional_input_size: 1, layer_output: layer_2_out_0 }, { layer_choice: [post_process_out(out=layer_2_out_0)], optional_inputs: [layer_0_out, layer_1_out], optional_input_size: 1, layer_output: layer_2_out }, { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out], optional_input_size: 1, layer_output: layer_3_out_0 }, { layer_choice: [post_process_out(out=layer_3_out_0)], optional_inputs: [layer_0_out, layer_1_out, layer_2_out], optional_input_size: 1, layer_output: layer_3_out } )""" layers, out_filters = add_fixed_pooling_layer( 3, layers, out_filters, is_training, pos_embedding, field_embedding) x, layer_0_out, layer_1_out, layer_2_out, layer_3_out = layers[-5:] print("layer_out", x, layer_0_out, layer_1_out, layer_2_out, layer_3_out) """@nni.mutable_layers( { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out], optional_input_size: 1, layer_output: layer_4_out_0 }, { layer_choice: [post_process_out(out=layer_4_out_0)], optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out], optional_input_size: 1, layer_output: layer_4_out }, { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out], optional_input_size: 1, layer_output: layer_5_out_0 }, { layer_choice: [post_process_out(out=layer_5_out_0)], optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out], optional_input_size: 1, layer_output: layer_5_out }, { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out], optional_input_size: 1, layer_output: layer_6_out_0 }, { layer_choice: [post_process_out(out=layer_6_out_0)], optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out], optional_input_size: 1, layer_output: layer_6_out }, { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out], optional_input_size: 1, layer_output: layer_7_out_0 }, { layer_choice: [post_process_out(out=layer_7_out_0)], optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out], optional_input_size: 1, layer_output: layer_7_out } )""" layers, out_filters = add_fixed_pooling_layer( 7, layers, out_filters, is_training, pos_embedding, field_embedding) x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out = layers[ -9:] """@nni.mutable_layers( { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out], optional_input_size: 1, layer_output: layer_8_out_0 }, { layer_choice: [post_process_out(out=layer_8_out_0)], optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out], optional_input_size: 1, layer_output: layer_8_out }, { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out], optional_input_size: 1, layer_output: layer_9_out_0 }, { layer_choice: [post_process_out(out=layer_9_out_0)], optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out], optional_input_size: 1, layer_output: layer_9_out }, { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out], optional_input_size: 1, layer_output: layer_10_out_0 }, { layer_choice: [post_process_out(out=layer_10_out_0)], optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out], optional_input_size: 1, layer_output: layer_10_out }, { layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()], optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out], optional_input_size: 1, layer_output: layer_11_out_1 }, { layer_choice: [post_process_out(out=layer_11_out_1)], optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out], optional_input_size: 1, layer_output: layer_11_out }, { layer_choice: [final_process()], optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out, layer_11_out], optional_input_size: 1, layer_output: final_out } )""" print("len_layers: ", len(layers)) x = final_out if self.sliding_window: x = self._from_sliding_window(x, batch_size, sliding_windows) class_num = self.class_num with tf.variable_scope("fc"): if not self.is_output_attention: x = tf.reduce_mean(x, 2) else: batch_size = x.get_shape()[0].value inp_d = x.get_shape()[1].value inp_l = x.get_shape()[2].value final_attention_query = create_weight( "query", shape=[1, inp_d], trainable=True, initializer=tf.truncated_normal_initializer, regularizer=regularizer) if is_training or mode == "valid": batch_size = self.batch_size else: batch_size = self.eval_batch_size final_attention_query = tf.tile(final_attention_query, [batch_size, 1]) print("final_attention_query: {0}".format( final_attention_query)) #put channel to the last dim x = tf.transpose(x, [0, 2, 1]) x = tf.reshape(x, [-1, inp_l, inp_d]) print("x: {0}".format(x)) x = multihead_attention( queries=final_attention_query, keys=x, pos_embedding=pos_embedding, field_embedding=field_embedding, num_units=inp_d, num_heads=8, dropout_rate=0, is_training=is_training, causality=False, positional_encoding=self.positional_encoding) print("x: {0}".format(x)) x = tf.reshape(x, [-1, 1, inp_d]) x = tf.reduce_sum(x, axis=1) print("x: {0}".format(x)) if is_training: x = tf.nn.dropout(x, self.keep_prob) x = tf.layers.dense(x, units=class_num) return x
def _model(self, doc, bow_doc, datasets, is_training, reuse=False, mode="train"): with tf.variable_scope(self.name, reuse=reuse): layers = [] final_flags = [] if is_training: self.valid_lengths = [] with tf.variable_scope('embed'): regularizer = tf.contrib.layers.l2_regularizer( scale=self.l2_reg) if self.embedding_model == "none": embedding = create_weight( "w", shape=self.embedding["none"].shape, trainable=True, initializer=tf.truncated_normal_initializer, regularizer=regularizer) elif self.embedding_model == "glove": embedding = create_weight( "w", shape=None, trainable=True, initializer=self.embedding["glove"], regularizer=regularizer) elif self.embedding_model == "word2vec": embedding = create_weight( "w", shape=None, trainable=True, initializer=self.embedding["word2vec"], regularizer=regularizer) elif self.embedding_model == "all": embedding_glove = create_weight( "w_glove", shape=None, trainable=True, initializer=self.embedding["glove"], regularizer=regularizer) print("embedding_glove: {0}".format( embedding_glove.get_shape())) embedding_word2vec = create_weight( "w_word2vec", shape=None, trainable=True, initializer=self.embedding["word2vec"], regularizer=regularizer) print("embedding_word2vec: {0}".format( embedding_word2vec.get_shape())) embedding = tf.concat( [embedding_glove, embedding_word2vec], axis=0) print("join embedding: {0}".format(embedding.get_shape())) field_embedding = create_weight( "w_field", shape=self.embedding["field"].shape, trainable=True, initializer=tf.truncated_normal_initializer, regularizer=regularizer) self.final_embedding = embedding print("embedding: {0}".format(embedding)) print("doc: {0}".format(doc)) print("bow_doc: {0}".format(bow_doc)) if is_training or mode == "valid": batch_size = self.batch_size else: batch_size = self.eval_batch_size if self.sliding_window: doc, sliding_windows = self._to_sliding_window(doc, batch_size, size=64, step=32) bow_doc, _ = self._to_sliding_window(bow_doc, batch_size, size=64, step=32) print("doc after sliding window: {0}".format(doc)) if is_training: embedding = tf.nn.dropout(embedding, keep_prob=self.embed_keep_prob) doc = tf.nn.embedding_lookup(embedding, doc, max_norm=None) field_embedding = tf.nn.embedding_lookup(field_embedding, bow_doc, max_norm=None) if self.input_field_embedding: doc = tf.add_n([doc, field_embedding]) doc = tf.transpose(doc, [0, 2, 1]) print("doc_shape", doc.shape) inp_c = doc.shape[1] inp_w = doc.shape[2] #doc = tf.reshape(doc, [-1, inp_c, 1, inp_w]) doc = tf.reshape(doc, [-1, inp_c, inp_w]) field_embedding = tf.transpose(field_embedding, [0, 2, 1]) #field_embedding = tf.reshape(field_embedding, [-1, inp_c, 1, inp_w]) field_embedding = tf.reshape(field_embedding, [-1, inp_c, inp_w]) print("after: doc, field_embedding", doc.shape, field_embedding.shape) x = doc pos_batch_size = 1 # initialize pos_embedding for transformer if self.input_positional_encoding: out_filters = 300 else: out_filters = self.out_filters if self.is_sinusolid: pos_embedding = self._positional_encoding( x, pos_batch_size, is_training, num_units=out_filters, zero_pad=False, scale=False, scope="enc_pe") else: pos_embedding = self._embedding(tf.tile( tf.expand_dims(tf.range(inp_w), 0), [pos_batch_size, 1]), vocab_size=inp_w, num_units=out_filters, reuse=tf.AUTO_REUSE, zero_pad=True, scale=False, scope="enc_pe") print("pos embedding: {0}".format(pos_embedding)) pos_embedding = tf.transpose(pos_embedding, [0, 2, 1]) #pos_embedding = tf.expand_dims(pos_embedding, axis=2) print("pos embedding: {0}".format(pos_embedding)) if self.input_positional_encoding: x += pos_embedding out_filters = self.out_filters with tf.variable_scope("init_conv"): # adjust out_filter dimension #print("init_x", x.shape) x = _conv_opt(x, 1, self.out_filters) x = batch_norm(x, is_training) def add_fixed_pooling_layer(layer_id, layers, out_filters, is_training, pos_embedding, field_embedding): '''Add a fixed pooling layer every four layers''' with tf.variable_scope("pos_embed_pool_{0}".format(layer_id)): pos_embedding = self._factorized_reduction( pos_embedding, out_filters, 2, is_training) with tf.variable_scope( "field_embed_pool_{0}".format(layer_id)): field_embedding = self._factorized_reduction( field_embedding, out_filters, 2, is_training) #out_filters *= 2 with tf.variable_scope("pool_at_{0}".format(layer_id)): pooled_layers = [] for i, layer in enumerate(layers): #print("pooling_layer", i, layer) with tf.variable_scope("from_{0}".format(i)): x = self._factorized_reduction( layer, out_filters, 2, is_training) #print("after x ", x) pooled_layers.append(x) layers = pooled_layers return layers, out_filters start_idx = 0 print("xxxxx", x) for layer_id in range(self.num_layers): with tf.variable_scope("layer_{0}".format(layer_id)): print("layers", layers) print("layer_id, x", layer_id, x) x = self._fixed_layer(x, pos_embedding, field_embedding, layer_id, layers, final_flags, start_idx, 0, out_filters, is_training) layers.append(x) if self.fixed_arc is not None: final_flags.append(1) print("sample_arc: {0}".format(self.sample_arc[start_idx])) if layer_id in self.pool_layers: layers, out_filters = add_fixed_pooling_layer( layer_id, layers, out_filters, is_training, pos_embedding, field_embedding) start_idx += 1 + layer_id if self.multi_path: start_idx += 1 print(layers[-1]) print("all_layers:", layers) final_layers = [] final_layers_idx = [] for i in range(0, len(layers)): if self.all_layer_output: if self.num_last_layer_output == 0: final_layers.append(layers[i]) final_layers_idx.append(i) elif i >= max( (len(layers) - self.num_last_layer_output), 0): final_layers.append(layers[i]) final_layers_idx.append(i) elif self.fixed_arc is not None and final_flags[i] == 1: final_layers.append(layers[i]) final_layers_idx.append(i) elif self.fixed_arc is None: final_layers.append(final_flags[i] * layers[i]) if self.fixed_arc is not None: print("final_layers: {0}".format(' '.join( [str(idx) for idx in final_layers_idx]))) if self.fixed_arc is not None and self.output_linear_combine: x = self._linear_combine(final_layers) else: x = tf.add_n(final_layers) if self.sliding_window: x = self._from_sliding_window(x, batch_size, sliding_windows) class_num = self.class_num with tf.variable_scope("fc"): if not self.is_output_attention: x = tf.reduce_mean(x, 2) else: batch_size = x.get_shape()[0].value inp_d = x.get_shape()[1].value inp_l = x.get_shape()[2].value final_attention_query = create_weight( "query", shape=[1, inp_d], trainable=True, initializer=tf.truncated_normal_initializer, regularizer=regularizer) if is_training or mode == "valid": batch_size = self.batch_size else: batch_size = self.eval_batch_size final_attention_query = tf.tile(final_attention_query, [batch_size, 1]) print("final_attention_query: {0}".format( final_attention_query)) # put channel to the last dim x = tf.transpose(x, [0, 2, 1]) x = tf.reshape(x, [-1, inp_l, inp_d]) print("x: {0}".format(x)) x = multihead_attention( queries=final_attention_query, keys=x, pos_embedding=pos_embedding, field_embedding=field_embedding, num_units=inp_d, num_heads=8, dropout_rate=0, is_training=is_training, causality=False, positional_encoding=self.positional_encoding) print("x: {0}".format(x)) x = tf.reshape(x, [-1, 1, inp_d]) x = tf.reduce_sum(x, axis=1) print("x: {0}".format(x)) if is_training: x = tf.nn.dropout(x, self.keep_prob) x = tf.layers.dense(x, units=class_num) return x