def _build_residual_classifier(self, split_idx): output = self.input_splits[split_idx] for i in range(self.nb_layers): if i == 0: output = OptimizedResBlockDisc1(output, self.nb_emb, self.output_dim, resample=None) else: output = resblock('ResBlock%d' % (i), self.output_dim, self.output_dim, self.filter_size, output, 'down' if i % 2 == 1 else None, self.is_training_ph, use_bn=self.use_bn, r=self.residual_connection) # [60, 30] --> [30, 15] --> [15, 7] --> [7, 3] # if i % 2 == 1: # output = lib.ops.LSTM.sn_non_local_block_sim('self-attention', output) # aggregate conv feature maps output = tf.reduce_mean(output, axis=[2]) # more clever attention mechanism for weighting the contribution if self.use_lstm: output = lib.ops.LSTM.bilstm('BILSTM', self.output_dim, output, tf.shape(output)[1]) output = lib.ops.Linear.linear('AMOutput', self.output_dim * 2 if self.use_lstm else self.output_dim, self.nb_class, output) if not hasattr(self, 'output'): self.output = [output] else: self.output += [output]
def _build_seq2seq(self, split_idx): output = self.input_splits[split_idx] for i in range(self.nb_layers): if i == 0: output = OptimizedResBlockDisc1(output, self.nb_emb, self.output_dim, resample=None) else: output = resblock('ResBlock%d' % (i), self.output_dim, self.output_dim, self.filter_size, output, None, self.is_training_ph, use_bn=self.use_bn, r=self.residual_connection) # aggregate conv feature maps output = tf.reduce_mean(output, axis=[2]) # more clever attention mechanism for weighting the contribution encoder_outputs, encoder_states = BiLSTMEncoder('Encoder', self.output_dim, output, self.max_size[0]) decoder_outputs, decoder_states = AttentionDecoder('Decoder', encoder_outputs, encoder_states, 8) output = lib.ops.Linear.linear('MapToOutputEmb', self.output_dim * 2, self.nb_class, decoder_outputs) if not hasattr(self, 'output'): self.output = [output] else: self.output += [output]
def _build_beam_seq2seq(self, split_idx, mode): ''' a seq2seq that uses beam search at inference stage. Only the sampled token is passed down to the next step at the inference stage. ''' if mode == 'training': output = self.input_splits[split_idx] elif mode == 'inference': output = self.inference_input_ph else: raise ValueError('unknown mode') with tf.variable_scope('pretrain_effect_zone'): for i in range(self.nb_layers): if i == 0: output = OptimizedResBlockDisc1(output, self.nb_emb, self.output_dim, resample=None) else: shape = output.get_shape().as_list() # no downsampling output = resblock('ResBlock%d' % (i), shape[-1], shape[-1] * 2 if i % 2 == 1 else shape[-1], self.filter_size, output, self.resample if i % 2 == 1 else None, self.is_training_ph, use_bn=self.use_bn, r=self.residual_connection) output = tf.nn.relu(output) shape = output.get_shape().as_list() output = tf.reshape(tf.transpose(output, [0, 2, 1, 3]), [-1, np.prod(shape[1:3]), shape[-1]]) mnist_output = lib.ops.Linear.linear( 'mnist_output', np.prod(shape[1:]), self.nb_mnist_class, tf.reshape(output, [-1, np.prod(shape[1:])])) # auxiliary loss on length nb_digits_output = lib.ops.Linear.linear('NBDigitsLinear', shape[-1], self.nb_length_class, tf.reduce_sum(output, axis=1)) encoder_outputs, encoder_states = BiLSTMEncoder( 'Encoder', shape[-1], output, np.prod(shape[1:3])) # feature dim from BiLSTMEncoder is shape[-1] * 2 decoder_outputs, decoder_states = BeamAttDecoder( 'Decoder', encoder_outputs, encoder_states, self.nb_max_digits, self.nb_class, mode=mode, beam_size=self.beam_size) # translation output output = decoder_outputs if mode == 'training': if not hasattr(self, 'output'): self.output = [output] self.mnist_output = [mnist_output] self.nb_digits_output = [nb_digits_output] else: self.output += [output] self.mnist_output += [mnist_output] self.nb_digits_output += [nb_digits_output] else: self.inference_output = output # [0]: beam tokens, [1] marginal logprob, [2] attention_weights
def _build_seq2seq(self, split_idx, mode): ''' A fairly basic seq2seq without beam search that passes attention vector to the next timestep at the decoding stage ''' if mode == 'training': output = self.input_splits[split_idx] elif mode == 'inference': output = self.inference_input_ph else: raise ValueError('unknown mode') with tf.variable_scope('pretrain_effect_zone'): for i in range(self.nb_layers): if i == 0: output = OptimizedResBlockDisc1(output, self.nb_emb, self.output_dim, resample=None) else: shape = output.get_shape().as_list() output = resblock('ResBlock%d' % (i), shape[-1], shape[-1] * 2 if i % 2 == 1 else shape[-1], self.filter_size, output, self.resample if i % 2 == 1 else None, self.is_training_ph, use_bn=self.use_bn, r=self.residual_connection) output = tf.nn.relu(output) shape = output.get_shape().as_list() output = tf.reshape(tf.transpose(output, [0, 2, 1, 3]), [-1, np.prod(shape[1:3]), shape[-1]]) mnist_output = lib.ops.Linear.linear( 'mnist_output', np.prod(shape[1:]), self.nb_mnist_class, tf.reshape(output, [-1, np.prod(shape[1:])])) # auxiliary loss on length nb_digits_output = lib.ops.Linear.linear('NBDigitsLinear', shape[-1], self.nb_length_class, tf.reduce_sum(output, axis=1)) encoder_outputs, encoder_states = BiLSTMEncoder( 'Encoder', shape[-1], output, np.prod(shape[1:3])) # feature dim from BiLSTMEncoder is shape[-1] * 2 decoder_outputs, decoder_states, att_weights = AttentionDecoder( 'Decoder', encoder_outputs, encoder_states, self.nb_max_digits, ) # translation output output = lib.ops.Linear.linear('MapToOutputEmb', shape[-1] * 2, self.nb_class, decoder_outputs) if mode == 'training': if not hasattr(self, 'output'): self.output = [output] self.mnist_output = [mnist_output] self.nb_digits_output = [nb_digits_output] else: self.output += [output] self.mnist_output += [mnist_output] self.nb_digits_output += [nb_digits_output] else: self.inference_output = output self.inference_att_weights = att_weights