예제 #1
0
파일: decoder.py 프로젝트: kpsc/nlp
    def decoder(self,
                dec_inputs,
                pre_state,
                encoder_outputs,
                encoder_len,
                decode=False):
        '''
        inputs:
            dec_inputs: A list of 2D Tensors [batch_size, embedding_size]
            encoder_outputs: [batch_size, seq_len, hidden_size]
            pre_state: [batch_size, hidden_size]
        returns:
            outputs: A list of the same length as decoder_inputs of 2D Tensor
            state: the final state of the decoder
        '''
        batch_size, _, hidden_size = encoder_outputs.get_shape().as_list()

        with tf.variable_scope('decoder'):
            # context_vector = tf.zeros([batch_size, hidden_size], dtype=tf.float32)
            # if decode:
            #     context_vector, _ = self.attention(pre_state, encoder_outputs, encoder_len)

            state = pre_state
            outputs = []
            for i, dec_input in enumerate(dec_inputs):
                if i > 0:
                    tf.get_variable_scope().reuse_variables()

                context_vector, _ = self.attention(state, encoder_outputs,
                                                   encoder_len)

                dec_size = dec_input.get_shape()[-1]
                x = linear([dec_input] + [context_vector], dec_size,
                           True)  # merge input and context

                output, state = self.cell(x, state)

                with tf.variable_scope('decoder_output'):
                    output = linear([output] + [context_vector],
                                    self.cell.output_size, True)
                outputs.append(output)

                # when we have output(y_i), then we can calculate the next time attention
                # if we are decode model, we decode only one step for each run
                # if not decode:
                #     context_vector, _ = self.attention(state, encoder_outputs, encoder_len)

        return outputs, state
예제 #2
0
    def discriminator(self,
                      inputs_logits,
                      num_blocks=3,
                      use_bias=False,
                      num_classes=1):
        '''
		The discriminator to score the distribution of time and event
		If the time is consistent with the history times, give high score.
		If it is on the constant, give low score.

		Implementation:
		CNN

		'''
        with tf.variable_scope('Discriminator'):
            inputs = tf.transpose(inputs_logits, [0, 2, 1])
            output = utils.conv1d('D.Input', 1, self.filter_output_dim,
                                  self.filter_size, inputs)
            output = self.res_block('D.1', output)
            output = self.res_block('D.2', output)
            output = self.res_block('D.3', output)
            output = self.res_block('D.4', output)
            output = self.res_block('D.5', output)
            output = tf.reshape(output,
                                [-1, self.num_steps * self.filter_output_dim])
            output = utils.linear('D.Output',
                                  self.num_steps * self.filter_output_dim, 1,
                                  output)
            return output
예제 #3
0
파일: decoder.py 프로젝트: kpsc/nlp
    def attention(self,
                  decoder_state,
                  encoder_outputs,
                  encoder_len,
                  reuse=None):
        hidden_size = encoder_outputs.get_shape().as_list()[-1]
        max_len = tf.shape(encoder_outputs)[1]
        with tf.variable_scope('Attention', reuse=reuse):
            encoder_outputs_ = tf.expand_dims(
                encoder_outputs,
                axis=2)  # [batch_size, seq_len, 1, hidden_size]
            w = tf.get_variable('w', [1, 1, hidden_size, hidden_size])
            encoder_features = tf.nn.conv2d(encoder_outputs_, w, [1, 1, 1, 1],
                                            'SAME')

            decoder_feature = linear(decoder_state, hidden_size,
                                     True)  # [batch_size, hidden_size]
            decoder_feature = tf.expand_dims(
                tf.expand_dims(decoder_feature, 1), 1)

            v = tf.get_variable('v', [hidden_size])
            e = tf.reduce_sum(v * tf.tanh(encoder_features + decoder_feature),
                              [2, 3])  # [batch_size, hidden_size]
            mask = tf.sequence_mask(encoder_len,
                                    maxlen=max_len,
                                    dtype=tf.float32)
            att = e * mask + (1 - mask) * (-1e6)
            att = tf.nn.softmax(att, axis=-1)

            context_vector = tf.matmul(
                tf.transpose(encoder_outputs, [0, 2, 1]),
                tf.expand_dims(att, 2))
            context_vector = tf.reshape(context_vector, [-1, hidden_size])

            return context_vector, att
예제 #4
0
 def _decoder_step(time, prev_output, prev_rnn_states, outputs_array):
     new_output, new_rnn_states = seq_cell(prev_output, prev_rnn_states)
     new_output = utils.linear(new_output, 3)
     new_output = tf.where(tf.greater(lens_tensor, time), new_output,
                           tf.zeros_like(new_output))
     outputs_array = outputs_array.write(time, new_output)
     return time + 1, new_output, new_rnn_states, outputs_array
    def add_dense_layers(self, architecture, input_layer, parameters,
                         name_aux):
        """
        Creates an MLP
        
        params:
            architecture: list on ints (hidden layers of the MLP)
            parameters: dictionary with weights
        """
        #TODO delete config parameter
        last_layer = input_layer
        #print(last_layer, "as input")
        #        prefix = prefix + "_" if prefix != '' else prefix
        #
        #        parameters = getattr(self, prefix + 'w')
        histograms = []
        for i, neurons in enumerate(architecture):
            number = 'l' + str(i + 1)
            layer_name = name_aux + number
            layer, weights, biases = \
                utils.linear(input_ = last_layer,
                       output_size = neurons,
                       activation_fn = tf.nn.relu,
                       name = layer_name)
            #            histograms += [tf.summary.histogram("w_" + layer_name, weights),
            #                           tf.summary.histogram("b_" + layer_name, biases)]
            #                           tf.summary.histogram("o_" + layer_name, layer)]
            #setattr(self, layer_name, layer)
            parameters[layer_name + "_w"] = weights
            parameters[layer_name + "_b"] = biases
            last_layer = layer
#            print(layer_name, layer.get_shape().as_list(), 'added')
#print(layer, 'added', layer_name)
        return last_layer, histograms
예제 #6
0
 def inference(self, x):
     with tf.variable_scope("conv0"):
         conv1 = utils.relu(utils.Bn(utils.conv2d(x, 64, 7, 7, 2, 2, bias=True), training=self.is_training))
     with tf.name_scope("pool1"):
         pool1 = utils.max_pool(conv1, 3, 3, 2, 2)
     with tf.variable_scope("group0"):
         res2a = self.residual(pool1, 256, name='block0')
         res2b = self.residual(res2a, 256, name='block1')
         res2c = self.residual(res2b, 256, name='block2')
     with tf.variable_scope("group1"):
         res3a = self.residual(res2c, 512, 2, name='block0')
         res3b = self.residual(res3a, 512, name='block1')
         res3c = self.residual(res3b, 512, name='block2')
         res3d = self.residual(res3c, 512, name='block3')
     with tf.variable_scope("group2"):
         res4a = self.residual(res3d, 1024, 2, name='block0')
         res4b = self.residual(res4a, 1024, name='block1')
         res4c = self.residual(res4b, 1024, name='block2')
         res4d = self.residual(res4c, 1024, name='block3')
         res4e = self.residual(res4d, 1024, name='block4')
         res4f = self.residual(res4e, 1024, name='block5')
     with tf.variable_scope("group3"):
         res5a = self.residual(res4f, 2048, 2, name='block0')
         res5b = self.residual(res5a, 2048, name='block1')
         res5c = self.residual(res5b, 2048, name='block2')
     with tf.name_scope("pool5"):
         pool5 = utils.global_pool(res5c)
     with tf.variable_scope("linear"):
         dropout = tf.nn.dropout(pool5, keep_prob=self.keep_prob)
         out = utils.linear(dropout, 1000)
     return out
예제 #7
0
 def __init__(self, deep_columns_idx, embedding_columns_dict, hidden_layers,
              dropouts, output_dim):
     """
     init parameters
     :param deep_columns_idx: dict include column name and it's index
         e.g. {'age': 0, 'career': 1,...}
     :param embedding_columns_dict: dict include categories columns name and number of unique val and embedding dimension
         e.g. {'age':(10, 32),...}
     :param hidden_layers: number of hidden layers
     :param deep_columns_idx: dict of columns name and columns index
     :param dropouts: list of float each hidden layers dropout len(dropouts) == hidden_layers - 1
     """
     super(DeepModel, self).__init__()
     self.embedding_columns_dict = embedding_columns_dict
     self.deep_columns_idx = deep_columns_idx
     for key, val in embedding_columns_dict.items():
         setattr(self, 'dense_col_' + key, nn.Embedding(val[0], val[1]))
     embedding_layer = 0
     for col in self.deep_columns_idx.keys():
         if col in embedding_columns_dict:
             embedding_layer += embedding_columns_dict[col][1]
         else:
             embedding_layer += 1
     self.layers = nn.Sequential()
     hidden_layers = [embedding_layer] + hidden_layers
     dropouts = [0.0] + dropouts
     for i in range(1, len(hidden_layers)):
         self.layers.add_module(
             'hidden_layer_{}'.format(i - 1),
             linear(hidden_layers[i - 1], hidden_layers[i],
                    dropouts[i - 1]))
     self.layers.add_module('last_linear',
                            nn.Linear(hidden_layers[-1], output_dim))
예제 #8
0
 def _discriminator_conv(self, states):
     '''Convolve output of bidirectional RNN and predict the discriminator label.'''
     with tf.variable_scope("Discriminator"):
         W_conv = tf.get_variable(
             'W_conv', [
                 cfg.d_conv_window, 1,
                 states.get_shape()[2], cfg.hidden_size // cfg.d_conv_window
             ],
             initializer=tf.contrib.layers.xavier_initializer_conv2d())
         b_conv = tf.get_variable('b_conv',
                                  [cfg.hidden_size // cfg.d_conv_window],
                                  initializer=tf.constant_initializer(0.0))
         states = tf.expand_dims(states, 2)
         conv = tf.nn.conv2d(states,
                             W_conv,
                             strides=[1, 1, 1, 1],
                             padding='SAME')
         conv_out = tf.reshape(
             conv,
             [2 * cfg.batch_size, -1, cfg.hidden_size // cfg.d_conv_window])
         conv_out = tf.nn.bias_add(conv_out, b_conv)
         reduced = tf.nn.elu(tf.reduce_sum(conv_out, [1])) * 1e-1
         output = utils.linear(reduced,
                               1,
                               True,
                               0.0,
                               scope='discriminator_output')
     return output
예제 #9
0
    def generator(self, z):
        """
        :param z: the noise vector
        :return:
        """
        with tf.variable_scope("generator") as scope:
            self.z_, self.h0_w, self.h0_b = linear(z, self.gf_dim * 8 * 4 * 4, 'g_h0_lin', with_w=True)
            hs = [None]
            hs[0] = tf.reshape(self.z_, [-1, 4, 4, self.gf_dim * 8])
            hs[0] = tf.nn.relu(self.g_bns[0](hs[0], self.is_training))

            i = 1
            depth_mul = 8
            size = 8

            while size < self.image_size:
                hs.append(None)
                name = 'g_h{}'.format(i)
                hs[i], _, _, conv2d_transpose(hs[i - 1], [self.batch_size, size, size, self.gf_dim * depth_mul],
                                              name=name, with_w=True)
                hs[i] = tf.nn.relu(self.g_bns[i](hs[i]), self.istraining)
                i += 1
                depth_mul //= 2
                size *= 2

            hs.append(None)
            name = 'g_h{}'.format(i)
            hs[i], _, _ = conv2d_transpose(hs[i - 1], [self.batch_size, size, size, 3], name=name, with_w=True)

            return tf.nn.tanh(hs[i])
예제 #10
0
 def discriminator(self,
                   inputs_logits,
                   num_blocks=3,
                   use_bias=False,
                   num_classes=1):
     """
     The discriminator to score the distribution of time and event
     If the time is consistent with the history times, give high score.
     If it is on the constant, give low score.
     Implementation:
     CNN"""
     with tf.variable_scope('Discriminator'):
         # inputs = tf.transpose(inputs_logits, [0,2,1])
         inputs = inputs_logits
         output = utils.conv1d('D.Input', 1, self.filter_output_dim,
                               self.filter_size, inputs)
         output = self.res_block('D.1', output)
         output = self.res_block('D.2', output)
         output = self.res_block('D.3', output)
         output = self.res_block('D.4', output)
         output = self.res_block('D.5', output)
         output = tf.reshape(
             output,
             [-1, (self.length + self.num_steps) * self.filter_output_dim])
         # if the output size is 1, it is the discriminator score of D
         # if the output size is 2, it is a bi-classification result of D
         output = tf.nn.sigmoid(
             utils.linear('D.Output', (self.length + self.num_steps) *
                          self.filter_output_dim, 1, output))
         logging.info('The shape of output from D {}'.format(
             output.get_shape()))
         return output
예제 #11
0
def build_model(xs, ys, n_neurons, n_layers, activation_fn,
                final_activation_fn, cost_type):

    xs = np.asarray(xs)
    ys = np.asarray(ys)

    if xs.ndim != 2:
        raise ValueError('xs should be a n_observates x n_features, ' +
                         'or a 2-dimensional array.')
    if ys.ndim != 2:
        raise ValueError('ys should be a n_observates x n_features, ' +
                         'or a 2-dimensional array.')

    n_xs = xs.shape[1]
    n_ys = ys.shape[1]

    X = tf.compat.v1.placeholder(name='X',
                                 shape=[None, n_xs],
                                 dtype=tf.float32)
    Y = tf.compat.v1.placeholder(name='Y',
                                 shape=[None, n_ys],
                                 dtype=tf.float32)

    current_input = X
    for layer_i in range(n_layers):
        current_input = utils.linear(current_input,
                                     n_neurons,
                                     activation=activation_fn,
                                     name='layer{}'.format(layer_i))[0]

    Y_pred = utils.linear(current_input,
                          n_ys,
                          activation=final_activation_fn,
                          name='pred')[0]

    if cost_type == 'l1_norm':
        cost = tf.reduce_mean(
            input_tensor=tf.reduce_sum(input_tensor=tf.abs(Y -
                                                           Y_pred), axis=1))
    elif cost_type == 'l2_norm':
        cost = tf.reduce_mean(input_tensor=tf.reduce_sum(
            input_tensor=tf.math.squared_difference(Y, Y_pred), axis=1))
    else:
        raise ValueError('Unknown cost_type: {}.  '.format(cost_type) +
                         'Use only "l1_norm" or "l2_norm"')

    return {'X': X, 'Y': Y, 'Y_pred': Y_pred, 'cost': cost}
예제 #12
0
    def add_dueling(self, prefix, input_layer):
        """
        Extends module with the Dueling architecture
        """
        if prefix in ['', 'target']:
            #DQN
            architecture = self.ag.architecture_duel
            output_length = self.environment.action_size
        else:
            #HDQN
            if prefix in ['mc', 'mc_target']:
                architecture = self.mc_ag.architecture_duel
                output_length = self.goal_size
            elif prefix in ['c', 'c_target']:
                architecture = self.c_ag.architecture_duel
                output_length = self.environment.action_size
            else:
                assert 0

        parameters = self.get(prefix, 'w')
        #        prefix = prefix.replace("target_", "")
        last_layer = input_layer

        #print("adding dense into ", prefix+'w')
        value_hid, histograms_v = self.add_dense_layers(
            architecture=architecture,
            input_layer=last_layer,
            parameters=parameters,
            name_aux='value_hid_')
        adv_hid, histograms_a = self.add_dense_layers(
            architecture=architecture,
            input_layer=last_layer,
            parameters=parameters,
            name_aux='adv_hid_')
        aux1 = 'value_out'
        aux2 = 'adv_out'

        value, w_val, b_val = utils.linear(value_hid, 1, name=aux1)
        adv, w_adv, b_adv = utils.linear(adv_hid, output_length, name=aux2)
        parameters[aux1 + "_w"] = w_val
        parameters[aux1 + "_b"] = b_val
        parameters[aux2 + "_w"] = w_adv
        parameters[aux2 + "_b"] = b_adv
        q = value + (adv -
                     tf.reduce_mean(adv, reduction_indices=1, keepdims=True))
        #print(q)
        return q
예제 #13
0
 def __init__(self, input_dim, output_dim, dropout=0):
     """
     wide model using LR
     :param input_dim: int the dimension of wide model input
     :param output_dim: int the dimension of wide model output
     """
     super(WideModel, self).__init__()
     self.linear = linear(input_dim, output_dim, dropout)
예제 #14
0
 def discriminator_energy(self, states):  # FIXME
     '''An energy-based discriminator that tries to reconstruct the input states.'''
     with tf.variable_scope("Discriminator"):
         _, state = tf.nn.dynamic_rnn(self.rnn_cell(cfg.d_num_layers,
                                                    cfg.hidden_size),
                                      states,
                                      swap_memory=True,
                                      dtype=tf.float32,
                                      scope='discriminator_encoder')
         # XXX use BiRNN+convnet for the encoder
         # this latent is of size cfg.hidden_size since it needs a lot more capacity than
         # cfg.latent_size to reproduce the hidden states
         # TODO use all states instead of just the final state
         latent = utils.highway(state, layer_size=1)
         latent = utils.linear(latent,
                               cfg.hidden_size,
                               True,
                               scope='discriminator_latent_transform')
         # TODO make initial state from latent, don't just use zeros
         decoder_input = tf.concat(
             1,
             [tf.zeros([2 * cfg.batch_size, 1, cfg.hidden_size]), states])
         output, _ = tf.nn.dynamic_rnn(self.rnn_cell(
             cfg.d_num_layers, cfg.hidden_size, latent),
                                       decoder_input,
                                       swap_memory=True,
                                       dtype=tf.float32,
                                       scope='discriminator_decoder')
         output = tf.reshape(output, [-1, cfg.hidden_size])
         reconstructed = utils.linear(output,
                                      cfg.hidden_size,
                                      True,
                                      0.0,
                                      scope='discriminator_reconst')
         reconstructed = tf.reshape(
             reconstructed, [2 * cfg.batch_size, -1, cfg.hidden_size])
         # don't train this projection, since the model can learn to zero out ret_latent to
         # minimize the reconstruction error
         ret_latent = tf.nn.tanh(
             utils.linear(self.latent,
                          cfg.hidden_size,
                          False,
                          scope='discriminator_ret_latent',
                          train=False))
     return reconstructed, ret_latent
예제 #15
0
    def __call__(self, inputs, state, scope=None):
        with vs.variable_scope("gates"):
            value = tf.nn.sigmoid(
                linear([state, inputs],
                       2 * self._num_units,
                       True,
                       normalize=self._normalize))
            i, f = array_ops.split(value=value, num_or_size_splits=2, axis=1)

        with vs.variable_scope("candidate"):
            c = linear([inputs],
                       self._num_units,
                       True,
                       normalize=self._normalize)

        new_c = i * c + f * state
        new_h = self._activation(c)

        return new_h, new_c
예제 #16
0
    def __call__(self, inputs, state, scope=None):
        with _checked_scope(self, scope or "rwa_cell", reuse=self._reuse):
            h, n, d = state

            with vs.variable_scope("u"):
                u = linear(inputs,
                           self._num_units,
                           True,
                           normalize=self._normalize)

            with vs.variable_scope("g"):
                g = linear([inputs, h],
                           self._num_units,
                           True,
                           normalize=self._normalize)

            with vs.variable_scope(
                    "a"
            ):  # The bias term when factored out of the numerator and denominator cancels and is unnecessary
                a = tf.exp(
                    linear([inputs, h],
                           self._num_units,
                           True,
                           normalize=self._normalize))

            with vs.variable_scope("discount_factor"):
                discount_factor = tf.nn.sigmoid(
                    linear([inputs, h],
                           self._num_units,
                           True,
                           normalize=self._normalize))

            z = tf.multiply(u, tanh(g))

            n = tf.multiply(n, discount_factor) + tf.multiply(
                z, a)  # Numerically stable update of numerator
            d = tf.multiply(d, discount_factor
                            ) + a  # Numerically stable update of denominator
            h_new = self._activation(tf.div(n, d))

            new_state = RDACellTuple(h_new, n, d)

        return h_new, new_state
예제 #17
0
 def discriminator(self, img, cond, reuse):
     dim = len(img.get_shape())
     with tf.variable_scope("disc", reuse=reuse):
         image = tf.concat([img, cond], dim - 1)
         feature = conf.conv_channel_base
         h0 = lrelu(conv2d(image, feature, name="h0"))
         h1 = lrelu(batch_norm(conv2d(h0, feature * 2, name="h1"), "h1"))
         h2 = lrelu(batch_norm(conv2d(h1, feature * 4, name="h2"), "h2"))
         h3 = lrelu(batch_norm(conv2d(h2, feature * 8, name="h3"), "h3"))
         h4 = linear(tf.reshape(h3, [1, -1]), 1, "linear")
     return h4
예제 #18
0
    def __atm_chisq__(self, pars, apertures):
        """Computes combined chi (not squared) for every aperture"""

        k, *c = pars
        diff = np.array([])
        for i, data in enumerate(apertures):
            # compute chi residuals for each aperture
            mag, mag_err = utils.flux_to_mag(data[:,2], data[:,3])
            airmass = data[:,0]
            diff_new = np.abs((mag - utils.linear(airmass, k, c[i])) / mag_err)
            diff = np.hstack((diff, diff_new))
        return diff
예제 #19
0
    def g_time(self, hidden_r):
        '''
		The generator model for time and event
		'''
        with tf.variable_scope('Generator_T'):
            outputs = utils.build_rnn_graph(hidden_r, self.num_layers,
                                            self.hidden_size, self.batch_size,
                                            self.length, "G_T.RNN")
            output = tf.reshape(tf.concat(outputs, 1), [-1, self.g_size])
            output = utils.linear('G_T.Output', self.g_size, 1, output)
            logits = tf.reshape(output, [self.batch_size, self.length, 1])
            return logits
예제 #20
0
    def discriminator(self, image, reuse=False):
        with tf.variable_scope('discriminator') as scope:
            if reuse:
                scope.reuse_variables()

            h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv'))
            h1 = lrelu(self.d_bns[0](conv2d(h0, self.df_dim * 2, name='d_h1_conv'), self.is_training))
            h2 = lrelu(self.d_bns[1](conv2d(h1, self.df_dim * 4, name='d_h2_conv'), self.is_training))
            h3 = lrelu(self.d_bns[2](conv2d(h2, self.df_dim * 8, name='d_h3_conv'), self.is_training))
            h4 = linear(tf.reshape(h3, [-1, 8192]), 1, 'd_h4_lin')

            return tf.nn.sigmoid(h4), h4
예제 #21
0
	def build_cnn_model(self):
		
		self.imgs = tf.placeholder('float32', [self.batch_size, self.input_dims])
		self.img_reshape = tf.reshape(self.imgs, [self.batch_size, self.w, self.h, self.channel])	
		if self.synthetic:
			self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'], self.synthetic_grad['l1'] = conv2d(self.img_reshape, 128, [5,5], [1,1],
									self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l1_con2d')		
			self.layer_out['l1_pool'] = pooling(self.layer_out['l1'], kernel_size=[3,3], stride=[1,1], type='max')

			self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'], self.synthetic_grad['l2'] = conv2d(self.layer_out['l1_pool'], 128, [5,5], [1,1],
									self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l2_con2d')
			self.layer_out['l2_pool'] = pooling(self.layer_out['l2'], kernel_size=[3,3], stride=[1,1], type='average')

			self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'], self.synthetic_grad['l3'] = conv2d(self.layer_out['l2_pool'], 128, [5,5], [1,1],
									self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l3_con2d')
			self.layer_out['l3_pool'] = pooling(self.layer_out['l3'], kernel_size=[3,3], stride=[1,1], type='average')
			self.layer_out['l3_reshape'] = tf.reshape(self.layer_out['l3_pool'], [self.batch_size, -1])

			self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'], self.synthetic_grad['l4'] = linear(self.layer_out['l3_reshape'], self.output_size,
									self.weight_initializer, self.bias_initializer, synthetic=True, activation_fn=tf.nn.relu, name='l4_linear')
		else:
			self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'] = conv2d(self.img_reshape, 128, [5,5], [1,1],
									self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l1_con2d')		
			self.layer_out['l1_pool'] = pooling(self.layer_out['l1'], kernel_size=[3,3], stride=[1,1], type='max')

			self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'] = conv2d(self.layer_out['l1_pool'], 128, [5,5], [1,1],
									self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l2_con2d')
			self.layer_out['l2_pool'] = pooling(self.layer_out['l2'], kernel_size=[3,3], stride=[1,1], type='average')

			self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'] = conv2d(self.layer_out['l2_pool'], 128, [5,5], [1,1],
									self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l3_con2d')
			self.layer_out['l3_pool'] = pooling(self.layer_out['l3'], kernel_size=[3,3], stride=[1,1], type='average')
			self.layer_out['l3_reshape'] = tf.reshape(self.layer_out['l3_pool'], [self.batch_size, -1])

			self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'] = linear(self.layer_out['l3_reshape'], self.output_size,
									self.weight_initializer, self.bias_initializer, activation_fn=tf.nn.relu, name='l4_linear')

		self.out_logit = tf.nn.softmax(self.layer_out['l4'])
		self.out_argmax = tf.argmax(self.out_logit, 1)
		self.labels = tf.placeholder('int32', [self.batch_size])
		self.loss_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(self.layer_out['l4'], self.labels)
		self.loss = tf.reduce_sum(self.loss_entropy)/self.batch_size

		if self.synthetic:
			self.grad_output['l1'] = tf.gradients(self.loss, self.layer_out['l1'])
			self.grad_output['l2'] = tf.gradients(self.loss, self.layer_out['l2'])
			self.grad_output['l3'] = tf.gradients(self.loss, self.layer_out['l3'])
			self.grad_output['l4'] = tf.gradients(self.loss, self.layer_out['l4'])
	
			for k in self.grad_output.keys():
				self.grad_loss.append(tf.reduce_sum(tf.square(self.synthetic_grad[k]-self.grad_output[k])))
			self.grad_total_loss = sum(self.grad_loss)
예제 #22
0
    def __init__(self, x_bxu, z_size, name, var_min=0.0):
        """Create an input dependent diagonal Gaussian distribution.

        Args:
          x: The input tensor from which the mean and variance are computed,
            via a linear transformation of x.  I.e.
              mu = Wx + b, log(var) = Mx + c
          z_size: The size of the distribution.
          name:  The name to prefix to learned variables.
          var_min (optional): Minimal variance allowed.  This is an additional
            way to control the amount of information getting through the stochastic
            layer.
        """
        size_bxn = tf.stack([tf.shape(x_bxu)[0], z_size])  # [batch, size]
        self.mean_bxn = mean_bxn = linear(x_bxu, z_size, name=(name + "/mean"))
        logvar_bxn = linear(x_bxu, z_size, name=(name + "/logvar"))
        if var_min > 0.0:
            logvar_bxn = tf.log(tf.exp(logvar_bxn) + var_min)
        self.logvar_bxn = logvar_bxn
        self.noise_bxn = noise_bxn = tf.random_normal(size_bxn)  # [batch, size]
        self.noise_bxn.set_shape([None, z_size])  # [batch, size]
        self.sample_bxn = mean_bxn + tf.exp(0.5 * logvar_bxn) * noise_bxn  # [batch, size]
예제 #23
0
	def build_mlp_model(self):
		
		self.imgs = tf.placeholder('float32',[self.batch_size, self.input_dims])

		# quite annoyed
		if self.synthetic:
			self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'], self.synthetic_grad['l1'] = linear(self.imgs, self.hidden_size,
							self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l1_linear')
			self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'], self.synthetic_grad['l2'] = linear(self.layer_out['l1'], self.hidden_size,
							self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l2_linear')
			self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'], self.synthetic_grad['l3'] = linear(self.layer_out['l2'], self.hidden_size,
							self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l3_linear')
			self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'], self.synthetic_grad['l4'] = linear(self.layer_out['l3'], self.output_size,
							self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l4_linear')
		else:
			self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'] = linear(self.imgs, self.hidden_size,
							self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l1_linear')
			self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'] = linear(self.layer_out['l1'], self.hidden_size,
							self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l2_linear')
			self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'] = linear(self.layer_out['l2'], self.hidden_size,
							self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l3_linear')
			self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'] = linear(self.layer_out['l3'], self.output_size,
							self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l4_linear')

		self.out_logit = tf.nn.softmax(self.layer_out['l4'])
		self.out_argmax = tf.argmax(self.out_logit, 1)
		self.labels = tf.placeholder('int32', [self.batch_size])
		self.loss_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(self.layer_out['l4'], self.labels)
		self.loss = tf.reduce_mean(self.loss_entropy)

		if self.synthetic:
			self.grad_output['l1'] = tf.gradients(self.loss, self.layer_out['l1'])
			self.grad_output['l2'] = tf.gradients(self.loss, self.layer_out['l2'])
			self.grad_output['l3'] = tf.gradients(self.loss, self.layer_out['l3'])
			self.grad_output['l4'] = tf.gradients(self.loss, self.layer_out['l4'])
	
			for k in self.grad_output.keys():
				self.grad_loss.append(tf.reduce_sum(tf.square(self.synthetic_grad[k]-self.grad_output[k])))
			self.grad_total_loss = sum(self.grad_loss)
예제 #24
0
 def __call__(self, inputs, state, scope=None):
     """Gated recurrent unit (GRU) with nunits cells."""
     with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
         if self.pretanh:
             state = state[:, :self.num_units]
         with tf.variable_scope("Gates"):  # Reset gate and update gate.
             # We start with bias of 1.0 to not reset and not update.
             r, u = tf.split(
                 1, 2,
                 utils.linear([inputs, state], 2 * self.num_units, True,
                              1.0))
             r, u = tf.nn.sigmoid(r), tf.nn.sigmoid(u)
         with tf.variable_scope("Candidate"):
             preact = utils.linear([inputs, r * state], self.num_units,
                                   True)
             c = self.activation(preact)
         new_h = u * state + (1 - u) * c
     if self.pretanh:
         new_state = tf.concat(1, [new_h, preact])
     else:
         new_state = new_h
     return new_h, new_state
예제 #25
0
    def discriminator(self, image, caption, reuse=False):
        if reuse:
            tf.get_variable_scope().reuse_variables()

        h0 = utils.lrelu(
            utils.conv2d(image, self.channel_dim, name='d_h0_conv'))
        h1 = utils.lrelu(
            self.d_bn1(utils.conv2d(h0, self.channel_dim * 2,
                                    name='d_h1_conv')))
        h2 = utils.lrelu(
            self.d_bn2(utils.conv2d(h1, self.channel_dim * 4,
                                    name='d_h2_conv')))
        h3 = utils.lrelu(
            self.d_bn3(utils.conv2d(h2, self.channel_dim * 8,
                                    name='d_h3_conv')))

        reduced_caption = utils.lrelu(
            utils.linear(caption, self.reduced_text_dim, 'd_embedding'))
        reduced_caption = tf.expand_dims(reduced_caption, 1)
        reduced_caption = tf.expand_dims(reduced_caption, 2)
        tiled_caption = tf.tile(reduced_caption, [1, 4, 4, 1],
                                name='tiled_embedding')

        h3_concat = tf.concat([h3, tiled_caption], 3, name='h3_concat')
        h3_new = utils.lrelu(
            self.d_bn4(
                utils.conv2d(h3_concat,
                             self.channel_dim * 8,
                             1,
                             1,
                             1,
                             1,
                             name='d_h3_conv_new')))

        h4 = utils.linear(tf.reshape(h3_new, [self.batch_size, -1]), 1,
                          'd_h3_lin')

        return tf.nn.sigmoid(h4), h4
예제 #26
0
  def __init__(self, x_bxu, z_size, name, var_min=0.0):
    """Create an input dependent diagonal Gaussian distribution.

    Args:
      x: The input tensor from which the mean and variance are computed,
        via a linear transformation of x.  I.e.
          mu = Wx + b, log(var) = Mx + c
      z_size: The size of the distribution.
      name:  The name to prefix to learned variables.
      var_min (optional): Minimal variance allowed.  This is an additional
        way to control the amount of information getting through the stochastic
        layer.
    """
    size_bxn = tf.stack([tf.shape(x_bxu)[0], z_size])
    self.mean_bxn = mean_bxn = linear(x_bxu, z_size, name=(name+"/mean"))
    logvar_bxn = linear(x_bxu, z_size, name=(name+"/logvar"))
    if var_min > 0.0:
      logvar_bxn = tf.log(tf.exp(logvar_bxn) + var_min)
    self.logvar_bxn = logvar_bxn

    self.noise_bxn = noise_bxn = tf.random_normal(size_bxn)
    self.noise_bxn.set_shape([None, z_size])
    self.sample_bxn = mean_bxn + tf.exp(0.5 * logvar_bxn) * noise_bxn
    def build_controller(self):
        self.c_w = {}
        self.c_target_w = {}
    
        with tf.variable_scope('c_prediction'):
            #input_size = self.environment.state_size + self.goal_size
            
            self.c_s_t = tf.placeholder("float",
                                [None, 1,
                                self.environment.state_size],
                                name = 'c_s_t')
            shape = self.c_s_t.get_shape().as_list()
            self.c_s_t_flat = tf.reshape(self.c_s_t, [-1, reduce(
                    lambda x, y: x * y, shape[1:])])
            self.c_g_t = tf.placeholder("float",
                               [None, self.goal_size],
                               name = 'c_g_t')
            self.c_gs_t = tf.concat([self.c_g_t, self.c_s_t_flat],
                           axis = 1,
                           name = 'c_gs_concat')
            last_layer = self.c_gs_t
            last_layer, histograms = self.add_dense_layers(
                                            architecture = self.c_ag.architecture,
                                               input_layer = last_layer,
                                               parameters = self.c_w,
                                               name_aux= '')
            if self.c_ag.dueling:
                self.c_q = self.add_dueling(prefix = 'c',
                                            input_layer = last_layer)
            else:
                self.c_q, self.c_w['q_w'], self.c_w['q_b'] = \
                                      utils.linear(last_layer,
                                      self.environment.action_size,
                                      name='c_q')
            self.c_q_action= tf.argmax(self.c_q, axis=1)
            
            q_summary = histograms
            avg_q = tf.reduce_mean(self.c_q, 0)
            

            for idx in range(self.c_ag.q_output_length):
                q_summary.append(tf.summary.histogram('c_q/%s' % idx, avg_q[idx]))
            self.c_q_summary = tf.summary.merge(q_summary, 'c_q_summary')

        # target network
        self.create_target(prefix = 'c')
        
        
        #Controller optimizer
        self.build_optimizer(prefix = 'c')
예제 #28
0
    def encode(self):

        encoded_output, encoded_state = utils.encode_seq(
            input_seq=self.q1,
            seq_len=self.len1,
            word_embeddings=self.word_embeddings,
            num_neurons=self.num_neurons)  # [batch_size, 2*num_neurons]

        with tf.variable_scope(
                "variational_inference"):  # Variational inference
            mean = utils.linear(encoded_state, self.hidden_size,
                                scope='mean')  # [batch_size, n_hidden]
            logsigm = utils.linear(encoded_state,
                                   self.hidden_size,
                                   scope='logsigm')  # [batch_size, n_hidden]
            self.mean, self.logsigm = mean, logsigm

            # Gaussian Multivariate kld(z,N(0,1)) = -0.5 * [ sum_d(logsigma) + d - sum_d(sigma) - mu_T*mu]
            klds = -0.5 * (tf.reduce_sum(logsigm, 1) +
                           tf.cast(tf.shape(mean)[1], tf.float32) -
                           tf.reduce_sum(tf.exp(logsigm), 1) -
                           tf.reduce_sum(tf.square(mean), 1)
                           )  # KLD(q(z|x), N(0,1))     tensor [batch_size]
            utils.variable_summaries(
                'klds', klds)  # posterior distribution close to prior N(0,1)
            self.kld = tf.reduce_mean(klds, 0)  # mean over batches: scalar

            h_ = tf.get_variable("GO", [1, self.hidden_size],
                                 initializer=self.initializer)
            h_ = tf.tile(h_, [self.batch_size, 1
                              ])  # trainable tensor: decoder init_state[1]

            eps = tf.random_normal((self.batch_size, self.hidden_size), 0, 1)
            self.doc_vec = tf.multiply(
                tf.exp(logsigm), eps
            ) + mean  # sample from latent intent space: decoder init_state[0]
            self.doc_vec = self.doc_vec, h_  # tuple state Z, h
 def encode(self, x, weights=None):
     if weights == None:
         conv = self.encoder(x)
         mu, logvar = self.fc21(conv.view(-1, 512)), self.fc22(
             conv.view(-1, 512))
         z = self.reparameterize(mu, logvar)
     else:
         x = utils.conv2d(x, weights[0], weights[1], stride=2, padding=1)
         x = utils.batch_norm(x, weights[2], weights[3], momentum=1)
         x = F.relu(x)
         x = utils.conv2d(x, weights[4], weights[5], stride=2, padding=1)
         x = utils.batch_norm(x, weights[6], weights[7], momentum=1)
         x = F.relu(x)
         x = utils.conv2d(x, weights[8], weights[9], stride=2, padding=1)
         x = utils.batch_norm(x, weights[10], weights[11], momentum=1)
         x = F.relu(x)
         x = utils.conv2d(x, weights[12], weights[13], stride=1, padding=0)
         x = utils.batch_norm(x, weights[14], weights[15], momentum=1)
         x = F.relu(x)
         x = x.view(-1, 512)
         mu = utils.linear(x, weights[16], weights[17])
         logvar = utils.linear(x, weights[18], weights[19])
         z = self.reparameterize(mu, logvar)
     return z, mu, logvar
예제 #30
0
    def __call__(self, inputs, state, scope=None):
        with _checked_scope(self, scope or "ran_cell", reuse=self._reuse):
            with vs.variable_scope("gates"):
                c, h = state
                gates = tf.nn.sigmoid(
                    linear([inputs, h],
                           2 * self._num_units,
                           True,
                           normalize=self._normalize))
                i, f = array_ops.split(value=gates,
                                       num_or_size_splits=2,
                                       axis=1)

            with vs.variable_scope("candidate"):
                content = linear([inputs],
                                 self._num_units,
                                 True,
                                 normalize=self._normalize)

            new_c = i * content + f * c
            new_h = self._activation(c)
            new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)
            output = new_h
        return output, new_state
예제 #31
0
 def g_time(self, hidden_r, name=''):
     """
     The generative model for time and event
     mode:
     1. use the concatenated hidden representation for each time step
     2. use the unfolded hidden representation separately for each time step
     """
     with tf.variable_scope('Generator/Time-g' + name):
         outputs = utils.build_rnn_graph_decoder1(hidden_r, self.num_layers,
                                                  self.hidden_size,
                                                  self.batch_size,
                                                  self.length, "G_T.RNN")
         output = tf.reshape(tf.concat(outputs, 1), [-1, self.g_size])
         output = utils.linear('G_T.Output', self.g_size, 1, output)
         logits = tf.reshape(output, [self.batch_size, self.length, 1])
         return logits
예제 #32
0
    def build_dqn(self):
        self.w = {}

        with tf.variable_scope('step'):
            self.step_op = tf.Variable(0, trainable=False, name='step')
            self.step_input = tf.placeholder('int32', None, name='step_input')
            self.step_assign_op = self.step_op.assign(self.step_input)

        # training network
        with tf.variable_scope('prediction'):

            # tf Graph input
            self.s_t = tf.placeholder(
                "float",
                [None, self.ag.history_length, self.environment.state_size],
                name='s_t')

            shape = self.s_t.get_shape().as_list()
            self.s_t_flat = tf.reshape(
                self.s_t, [-1, reduce(lambda x, y: x * y, shape[1:])])

            last_layer = self.s_t_flat
            last_layer, histograms = self.add_dense_layers(
                architecture=self.ag.architecture,
                input_layer=last_layer,
                parameters=self.w,
                name_aux='')
            if self.ag.dueling:
                self.q = self.add_dueling(prefix='', input_layer=last_layer)
            else:
                self.q, self.w['q_w'], self.w['q_b'] = utils.linear(
                    last_layer, self.environment.action_size, name='q')
            self.avg_q = tf.reduce_max(self.q, axis=1)
            self.q_action = tf.argmax(self.q, axis=1)
        self.create_target(prefix='')

        # optimizer
        self.build_optimizer(prefix='')

        self.setup_summary(self.m.scalar_tags, self.m.histogram_tags)
        tf.global_variables_initializer().run()
        vars_ = list(self.w.values()) + [self.step_op]
        self._saver = tf.train.Saver(vars_, max_to_keep=30)

        self.load_model()
        self.update_target_q_network(prefix='')
예제 #33
0
 def decoder(self, inputs, z):
     '''Use the latent representation and word inputs to predict next words.'''
     with tf.variable_scope("Decoder"):
         initial = []
         for i in range(cfg.num_layers):
             initial.append(
                 tf.nn.tanh(
                     utils.linear(z,
                                  cfg.hidden_size,
                                  True,
                                  0.0,
                                  scope='decoder_initial%d' % i)))
         self.decode_initial = tuple(initial)
         self.decode_cell = self.rnn_cell(cfg.num_layers)
         output, _ = tf.nn.dynamic_rnn(self.decode_cell,
                                       inputs,
                                       initial_state=self.decode_initial,
                                       sequence_length=self.lengths - 1,
                                       swap_memory=True,
                                       dtype=tf.float32)
     return output