Exemplo n.º 1
0
display_step = 1
examples_to_show = 10
n_input = 784
print(tf.version)
print(tf.__path__)

# tf Graph input (only pictures)
X = tf.placeholder("float", [None, n_input])

# 用字典的方式存储各隐藏层的参数
n_hidden_1 = 256  # 第一编码层神经元个数
n_hidden_2 = 128  # 第二编码层神经元个数
# 权重和偏置的变化在编码层和解码层顺序是相逆的
# 权重参数矩阵维度是每层的 输入*输出,偏置参数维度取决于输出层的单元数
weights = {
    'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'decoder_h1': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
    'decoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
}
biases = {
    'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'decoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'decoder_b2': tf.Variable(tf.random_normal([n_input])),
}


# 每一层结构都是 xW + b
# 构建编码器
def encoder(x):
Exemplo n.º 2
0
def matmul_train(x,
                 variational_params,
                 transpose_a=False,
                 transpose_b=False,
                 clip_alpha=None,
                 eps=common.EPSILON):
    R"""Training computation for a variation matmul.

  In variational dropout we train a Bayesian neural network where we assume a
  fully-factorized Gaussian posterior and log uniform prior over the weights.

  During training, we need to sample weights from this distribution. Rather
  than sample weights for each sample in the input batch, we can calculate the
  parameters of the distribution over the pre-activations analytically (this
  step is called the local reparameterization trick). This function calculates
  the mean and standard deviation of the distribution over the pre-activations,
  and then draws a single sample for each element in the input batch and passes
  them as output.

  Args:
    x: 2D Tensor representing the input batch.
    variational_params: 2-tuple of Tensors, where the first tensor is the \theta
      values and the second contains the log of the \sigma^2 values.
    transpose_a: If True, a is transposed before multiplication.
    transpose_b: If True, b is transposed before multiplication.
    clip_alpha: Int or None. If integer, we clip the log \alpha values to
      [-clip_alpha, clip_alpha]. If None, don't clip the values.
    eps: Small constant value to use in log and sqrt operations to avoid NaNs.

  Returns:
    Output Tensor of the matmul operation.

  Raises:
    RuntimeError: If the variational_params argument is not a 2-tuple.
  """
    # We expect a 2D input tensor, as in standard in fully-connected layers
    x.get_shape().assert_has_rank(2)

    theta, log_sigma2 = _verify_variational_params(variational_params)

    if clip_alpha is not None:
        # Compute the log_alphas and then compute the
        # log_sigma2 again so that we can clip on the
        # log alpha magnitudes
        log_alpha = common.compute_log_alpha(log_sigma2, theta, eps,
                                             clip_alpha)
        log_sigma2 = common.compute_log_sigma2(log_alpha, theta, eps)

    # Compute the mean and standard deviation of the distributions over the
    # activations
    mu_activation = tf.matmul(x,
                              theta,
                              transpose_a=transpose_a,
                              transpose_b=transpose_b)
    std_activation = tf.sqrt(
        tf.matmul(tf.square(x),
                  tf.exp(log_sigma2),
                  transpose_a=transpose_a,
                  transpose_b=transpose_b) + eps)

    output_shape = tf.shape(std_activation)
    return mu_activation + std_activation * tf.random_normal(output_shape)
Exemplo n.º 3
0
 def _sample(self):
     """ Sample from distribution, given observation """
     self.sampled_act = (self.means + tf.exp(self.log_vars / 2.0) *
                         tf.random_normal(shape=(self.act_dim, )))
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

tf.set_random_seed(777)  # for reproducibility

#1. 그래프 구현#

# H(x) = Wx + b
# X and Y data, H(x) = Y
#x_train = [1, 2, 3]
#y_train = [1, 2, 3]
X = tf.placeholder(tf.float32, shape=[None])
Y = tf.placeholder(tf.float32, shape=[None])
#                                   1차원이고 값은 마음데로 넣을 수 있다.

W = tf.Variable(tf.random_normal([1]), name="weight")
b = tf.Variable(tf.random_normal([1]), name="bias")
#                                rank

# Our hypothesis XW+b
hypothesis = X * W + b

# cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))

#t = [1. , 2. , 3. , 4.]
#tf.reduce_mean(t) ==> 2.5 평균을 내주는 명령어

# GradientDescent (Minimize/optimizer)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train = optimizer.minimize(cost)
Exemplo n.º 5
0
def broadcast_matmul_train(x,
                           variational_params,
                           clip_alpha=None,
                           eps=common.EPSILON):
    R"""Training computation for VD matrix multiplication with N input matrices.

  Multiplies a 3D tensor `x` with a set of 2D parameters. Each 2D matrix
  `x[i, :, :]` in the input tensor is multiplied indendently with the
  parameters, resulting in a 3D output tensor with shape
  `x.shape[:2] + weight_parameters[0].shape[1]`.

  Args:
    x: 3D Tensor representing the input batch.
    variational_params: 2-tuple of Tensors, where the first tensor is the
      unscaled weight values and the second is the log of the alpha values
      for the hard concrete distribution.
    clip_alpha: Int or None. If integer, we clip the log \alpha values to
      [-clip_alpha, clip_alpha]. If None, don't clip the values.
    eps: Small constant value to use in log and sqrt operations to avoid NaNs.

  Returns:
    Output Tensor of the batched matmul operation.

  Raises:
    RuntimeError: If the variational_params argument is not a 2-tuple.
  """
    theta, log_sigma2 = _verify_variational_params(variational_params)
    theta.get_shape().assert_has_rank(2)
    log_sigma2.get_shape().assert_has_rank(2)

    # The input data must have be rank 2 or greater
    assert x.get_shape().ndims >= 2
    input_rank = x.get_shape().ndims

    if clip_alpha is not None:
        # Compute the log_alphas and then compute the
        # log_sigma2 again so that we can clip on the
        # log alpha magnitudes
        log_alpha = common.compute_log_alpha(log_sigma2, theta, eps,
                                             clip_alpha)
        log_sigma2 = common.compute_log_sigma2(log_alpha, theta, eps)

    # Compute the mean and standard deviation of the distributions over the
    # activations
    mu_activation = tf.tensordot(x, theta, [[input_rank - 1], [0]])

    var_activation = tf.tensordot(tf.square(x), tf.exp(log_sigma2),
                                  [[input_rank - 1], [0]])
    std_activation = tf.sqrt(var_activation + eps)

    # Reshape the output back to the rank of the input
    input_shape = x.get_shape().as_list()
    weight_shape = theta.get_shape().as_list()
    output_shape = input_shape[:-1] + [weight_shape[1]]
    mu_activation.set_shape(output_shape)
    std_activation.set_shape(output_shape)

    # NOTE: We sample noise for each weight in theta, which will be shared by
    # each matrix product that was done. This is equivalent to sampling the same
    # set of weights for all matrix products done by this op in an iteration.
    # The element-wise multiply below broadcasts.
    num_pad_dims = len(output_shape) - 2
    padding = [tf.constant(1, dtype=tf.int32) for _ in range(num_pad_dims)]

    # NOTE: On GPU, the first dim may not be defined w/ the Transformer. Create
    # a tf.Tensor from the list shape and TF should match the first dim
    # appropriately
    batch_size = tf.shape(x)[0]
    data_dim = tf.shape(theta)[-1]
    noise_shape = tf.stack([batch_size] + padding + [data_dim], axis=0)

    output = mu_activation + std_activation * tf.random_normal(noise_shape)
    return output
Exemplo n.º 6
0
    def __init__(self, seq_length, emb_dim, hidden_dim, embeddings, emb_train):
        ## Define hyperparameters
        self.embedding_dim = emb_dim
        self.dim = hidden_dim
        self.sequence_length = seq_length

        ## Define the placeholders
        self.premise_x = tf.placeholder(tf.int32, [None, self.sequence_length])
        self.hypothesis_x = tf.placeholder(tf.int32,
                                           [None, self.sequence_length])
        self.y = tf.placeholder(tf.int32, [None])
        self.keep_rate_ph = tf.placeholder(tf.float32, [])

        ## Define parameters
        self.E = tf.Variable(embeddings, trainable=emb_train)

        self.W_mlp = tf.Variable(
            tf.random_normal([self.dim * 8, self.dim], stddev=0.1))
        self.b_mlp = tf.Variable(tf.random_normal([self.dim], stddev=0.1))

        self.W_cl = tf.Variable(tf.random_normal([self.dim, 3], stddev=0.1))
        self.b_cl = tf.Variable(tf.random_normal([3], stddev=0.1))

        ## Function for embedding lookup and dropout at embedding layer
        def emb_drop(x):
            emb = tf.nn.embedding_lookup(self.E, x)
            emb_drop = tf.nn.dropout(emb, self.keep_rate_ph)
            return emb_drop

        # Get lengths of unpadded sentences
        prem_seq_lengths, mask_prem = blocks.length(self.premise_x)
        hyp_seq_lengths, mask_hyp = blocks.length(self.hypothesis_x)

        ### First biLSTM layer ###

        premise_in = emb_drop(self.premise_x)
        hypothesis_in = emb_drop(self.hypothesis_x)

        premise_outs, c1 = blocks.biLSTM(premise_in,
                                         dim=self.dim,
                                         seq_len=prem_seq_lengths,
                                         name='premise')
        hypothesis_outs, c2 = blocks.biLSTM(hypothesis_in,
                                            dim=self.dim,
                                            seq_len=hyp_seq_lengths,
                                            name='hypothesis')

        premise_bi = tf.concat(premise_outs, axis=2)
        hypothesis_bi = tf.concat(hypothesis_outs, axis=2)

        premise_list = tf.unstack(premise_bi, axis=1)
        hypothesis_list = tf.unstack(hypothesis_bi, axis=1)

        ### Attention ###

        scores_all = []
        premise_attn = []
        alphas = []

        for i in range(self.sequence_length):

            scores_i_list = []
            for j in range(self.sequence_length):
                score_ij = tf.reduce_sum(tf.multiply(premise_list[i],
                                                     hypothesis_list[j]),
                                         1,
                                         keep_dims=True)
                scores_i_list.append(score_ij)

            scores_i = tf.stack(scores_i_list, axis=1)
            alpha_i = blocks.masked_softmax(scores_i, mask_hyp)
            a_tilde_i = tf.reduce_sum(tf.multiply(alpha_i, hypothesis_bi), 1)
            premise_attn.append(a_tilde_i)

            scores_all.append(scores_i)
            alphas.append(alpha_i)

        scores_stack = tf.stack(scores_all, axis=2)
        scores_list = tf.unstack(scores_stack, axis=1)

        hypothesis_attn = []
        betas = []
        for j in range(self.sequence_length):
            scores_j = scores_list[j]
            beta_j = blocks.masked_softmax(scores_j, mask_prem)
            b_tilde_j = tf.reduce_sum(tf.multiply(beta_j, premise_bi), 1)
            hypothesis_attn.append(b_tilde_j)

            betas.append(beta_j)

        # Make attention-weighted sentence representations into one tensor,
        premise_attns = tf.stack(premise_attn, axis=1)
        hypothesis_attns = tf.stack(hypothesis_attn, axis=1)

        # For making attention plots,
        self.alpha_s = tf.stack(alphas, axis=2)
        self.beta_s = tf.stack(betas, axis=2)

        ### Subcomponent Inference ###

        prem_diff = tf.subtract(premise_bi, premise_attns)
        prem_mul = tf.multiply(premise_bi, premise_attns)
        hyp_diff = tf.subtract(hypothesis_bi, hypothesis_attns)
        hyp_mul = tf.multiply(hypothesis_bi, hypothesis_attns)

        m_a = tf.concat([premise_bi, premise_attns, prem_diff, prem_mul], 2)
        m_b = tf.concat([hypothesis_bi, hypothesis_attns, hyp_diff, hyp_mul],
                        2)

        ### Inference Composition ###

        v1_outs, c3 = blocks.biLSTM(m_a,
                                    dim=self.dim,
                                    seq_len=prem_seq_lengths,
                                    name='v1')
        v2_outs, c4 = blocks.biLSTM(m_b,
                                    dim=self.dim,
                                    seq_len=hyp_seq_lengths,
                                    name='v2')

        v1_bi = tf.concat(v1_outs, axis=2)
        v2_bi = tf.concat(v2_outs, axis=2)

        ### Pooling Layer ###

        v_1_sum = tf.reduce_sum(v1_bi, 1)
        v_1_ave = tf.div(
            v_1_sum, tf.expand_dims(tf.cast(prem_seq_lengths, tf.float32), -1))

        v_2_sum = tf.reduce_sum(v2_bi, 1)
        v_2_ave = tf.div(
            v_2_sum, tf.expand_dims(tf.cast(hyp_seq_lengths, tf.float32), -1))

        v_1_max = tf.reduce_max(v1_bi, 1)
        v_2_max = tf.reduce_max(v2_bi, 1)

        v = tf.concat([v_1_ave, v_2_ave, v_1_max, v_2_max], 1)

        # MLP layer
        h_mlp = tf.nn.tanh(tf.matmul(v, self.W_mlp) + self.b_mlp)

        # Dropout applied to classifier
        h_drop = tf.nn.dropout(h_mlp, self.keep_rate_ph)

        # Get prediction
        self.logits = tf.matmul(h_drop, self.W_cl) + self.b_cl

        # Define the cost function
        self.total_cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y,
                                                           logits=self.logits))
Exemplo n.º 7
0
def compute_mel_filterbank_features(
    waveforms,
    sample_rate=16000, dither=1.0 / np.iinfo(np.int16).max, preemphasis=0.97,
    frame_length=25, frame_step=10, fft_length=None,
    window_fn=functools.partial(tf.signal.hann_window, periodic=True),
    lower_edge_hertz=80.0, upper_edge_hertz=7600.0, num_mel_bins=80,
    log_noise_floor=1e-3, apply_mask=True):
  """Implement mel-filterbank extraction using tf ops.

  Args:
    waveforms: float32 tensor with shape [batch_size, max_len]
    sample_rate: sampling rate of the waveform
    dither: stddev of Gaussian noise added to waveform to prevent quantization
      artefacts
    preemphasis: waveform high-pass filtering constant
    frame_length: frame length in ms
    frame_step: frame_Step in ms
    fft_length: number of fft bins
    window_fn: windowing function
    lower_edge_hertz: lowest frequency of the filterbank
    upper_edge_hertz: highest frequency of the filterbank
    num_mel_bins: filterbank size
    log_noise_floor: clip small values to prevent numeric overflow in log
    apply_mask: When working on a batch of samples, set padding frames to zero
  Returns:
    filterbanks: a float32 tensor with shape [batch_size, len, num_bins, 1]
  """
  # `stfts` is a complex64 Tensor representing the short-time Fourier
  # Transform of each signal in `signals`. Its shape is
  # [batch_size, ?, fft_unique_bins]
  # where fft_unique_bins = fft_length // 2 + 1

  # Find the wave length: the largest index for which the value is !=0
  # note that waveforms samples that are exactly 0.0 are quite common, so
  # simply doing sum(waveforms != 0, axis=-1) will not work correctly.
  wav_lens = tf.reduce_max(
      tf.expand_dims(tf.range(tf.shape(waveforms)[1]), 0) *
      tf.to_int32(tf.not_equal(waveforms, 0.0)),
      axis=-1) + 1
  if dither > 0:
    waveforms += tf.random_normal(tf.shape(waveforms), stddev=dither)
  if preemphasis > 0:
    waveforms = waveforms[:, 1:] - preemphasis * waveforms[:, :-1]
    wav_lens -= 1
  frame_length = int(frame_length * sample_rate / 1e3)
  frame_step = int(frame_step * sample_rate / 1e3)
  if fft_length is None:
    fft_length = int(2**(np.ceil(np.log2(frame_length))))

  stfts = tf.signal.stft(
      waveforms,
      frame_length=frame_length,
      frame_step=frame_step,
      fft_length=fft_length,
      window_fn=window_fn,
      pad_end=True)

  stft_lens = (wav_lens + (frame_step - 1)) // frame_step
  masks = tf.to_float(tf.less_equal(
      tf.expand_dims(tf.range(tf.shape(stfts)[1]), 0),
      tf.expand_dims(stft_lens, 1)))

  # An energy spectrogram is the magnitude of the complex-valued STFT.
  # A float32 Tensor of shape [batch_size, ?, 257].
  magnitude_spectrograms = tf.abs(stfts)

  # Warp the linear-scale, magnitude spectrograms into the mel-scale.
  num_spectrogram_bins = magnitude_spectrograms.shape[-1].value
  linear_to_mel_weight_matrix = (
      tf.signal.linear_to_mel_weight_matrix(
          num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
          upper_edge_hertz))
  mel_spectrograms = tf.tensordot(
      magnitude_spectrograms, linear_to_mel_weight_matrix, 1)
  # Note: Shape inference for tensordot does not currently handle this case.
  mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate(
      linear_to_mel_weight_matrix.shape[-1:]))

  log_mel_sgram = tf.log(tf.maximum(log_noise_floor, mel_spectrograms))

  if apply_mask:
    log_mel_sgram *= tf.expand_dims(tf.to_float(masks), -1)

  return tf.expand_dims(log_mel_sgram, -1, name="mel_sgrams")
Exemplo n.º 8
0
x_train = np.array([mnist[i].features for i in range(0, int(len(mnist) * 0.8))])
x_test = np.array([mnist[i].features for i in range(int(len(mnist) * 0.8), len(mnist))])
y_train = np.array([mnist[i].label for i in range(0, int(len(mnist) * 0.8))])
y_test = np.array([mnist[i].label for i in range(int(len(mnist) * 0.8), len(mnist))])

# Parameters
learning_rate = 0.0005
training_epochs = 2500
batch_size = 128
display_step = 10

x = tf.placeholder(tf.float32, [None, 20])
y = tf.placeholder(tf.float32, [None, 2])

W1 = tf.Variable(tf.random_normal([20, 10], stddev=0.03), name='W1')
b1 = tf.Variable(tf.random_normal([10]), name='b1')
W2 = tf.Variable(tf.random_normal([10, 2], stddev=0.03), name='W2')
b2 = tf.Variable(tf.random_normal([2]), name='b2')

hidden_out = tf.add(tf.matmul(x, W1), b1)
hidden_out = tf.nn.relu(hidden_out)

log = tf.matmul(hidden_out, W2) + b2

pred = tf.nn.softmax(tf.add(tf.matmul(hidden_out, W2), b2))

correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=log))
Exemplo n.º 9
0
 def _weight_variable(self,shape,name='weights'):
     return tf.Variable(tf.random_normal(shape),name=name)
Exemplo n.º 10
0
    def build_model(self, hps):
        """Define model architecture."""
        if hps.is_training:
            self.global_step = tf.Variable(0, name='global_step', trainable=False)

        if hps.dec_model == 'lstm':
            cell_fn = rnn.LSTMCell
        elif hps.dec_model == 'layer_norm':
            cell_fn = rnn.LayerNormLSTMCell
        elif hps.dec_model == 'hyper':
            cell_fn = rnn.HyperLSTMCell
        else:
            assert False, 'please choose a respectable cell'

        if hps.enc_model == 'lstm':
            enc_cell_fn = rnn.LSTMCell
        elif hps.enc_model == 'layer_norm':
            enc_cell_fn = rnn.LayerNormLSTMCell
        elif hps.enc_model == 'hyper':
            enc_cell_fn = rnn.HyperLSTMCell
        else:
            assert False, 'please choose a respectable cell'

        use_recurrent_dropout = self.hps.use_recurrent_dropout
        use_input_dropout = self.hps.use_input_dropout
        use_output_dropout = self.hps.use_output_dropout

        cell = cell_fn(
            hps.dec_rnn_size,
            use_recurrent_dropout=use_recurrent_dropout,
            dropout_keep_prob=self.hps.recurrent_dropout_prob)

        if hps.conditional:  # vae mode:
            if hps.enc_model == 'hyper':
                self.enc_cell_fw = enc_cell_fn(
                    hps.enc_rnn_size,
                    use_recurrent_dropout=use_recurrent_dropout,
                    dropout_keep_prob=self.hps.recurrent_dropout_prob)
                self.enc_cell_bw = enc_cell_fn(
                    hps.enc_rnn_size,
                    use_recurrent_dropout=use_recurrent_dropout,
                    dropout_keep_prob=self.hps.recurrent_dropout_prob)
            else:
                self.enc_cell_fw = enc_cell_fn(
                    hps.enc_rnn_size,
                    use_recurrent_dropout=use_recurrent_dropout,
                    dropout_keep_prob=self.hps.recurrent_dropout_prob)
                self.enc_cell_bw = enc_cell_fn(
                    hps.enc_rnn_size,
                    use_recurrent_dropout=use_recurrent_dropout,
                    dropout_keep_prob=self.hps.recurrent_dropout_prob)

        # dropout:
        tf.logging.info('Input dropout mode = %s.', use_input_dropout)
        tf.logging.info('Output dropout mode = %s.', use_output_dropout)
        tf.logging.info('Recurrent dropout mode = %s.', use_recurrent_dropout)
        if use_input_dropout:
            tf.logging.info('Dropout to input w/ keep_prob = %4.4f.',
                            self.hps.input_dropout_prob)
            cell = tf.nn.rnn_cell.DropoutWrapper(
                cell, input_keep_prob=self.hps.input_dropout_prob)
        if use_output_dropout:
            tf.logging.info('Dropout to output w/ keep_prob = %4.4f.',
                            self.hps.output_dropout_prob)
            cell = tf.nn.rnn_cell.DropoutWrapper(
                cell, output_keep_prob=self.hps.output_dropout_prob)
        self.cell = cell

        self.sequence_lengths = tf.placeholder(
            dtype=tf.int32, shape=[self.hps.batch_size])
        self.input_data = tf.placeholder(
            dtype=tf.float32,
            shape=[self.hps.batch_size, self.hps.max_seq_len + 1, 5])

        # The target/expected vectors of strokes
        self.output_x = self.input_data[:, 1:self.hps.max_seq_len + 1, :]
        # vectors of strokes to be fed to decoder (same as above, but lagged behind
        # one step to include initial dummy value of (0, 0, 1, 0, 0))
        self.input_x = self.input_data[:, :self.hps.max_seq_len, :]

        # either do vae-bit and get z, or do unconditional, decoder-only
        if hps.conditional:  # vae mode:
            self.mean, self.presig = self.encoder(self.output_x,
                                                  self.sequence_lengths)
            self.sigma = tf.exp(self.presig / 2.0)  # sigma > 0. div 2.0 -> sqrt.
            eps = tf.random_normal(
                (self.hps.batch_size, self.hps.z_size), 0.0, 1.0, dtype=tf.float32)
            self.batch_z = self.mean + tf.multiply(self.sigma, eps)
            # KL cost
            self.kl_cost = -0.5 * tf.reduce_mean(
                (1 + self.presig - tf.square(self.mean) - tf.exp(self.presig)))
            self.kl_cost = tf.maximum(self.kl_cost, self.hps.kl_tolerance)
            pre_tile_y = tf.reshape(self.batch_z,
                                    [self.hps.batch_size, 1, self.hps.z_size])
            overlay_x = tf.tile(pre_tile_y, [1, self.hps.max_seq_len, 1])
            actual_input_x = tf.concat([self.input_x, overlay_x], 2)
            self.initial_state = tf.nn.tanh(
                rnn.super_linear(
                    self.batch_z,
                    cell.state_size,
                    init_w='gaussian',
                    weight_start=0.001,
                    input_size=self.hps.z_size))
        else:  # unconditional, decoder-only generation
            self.batch_z = tf.zeros(
                (self.hps.batch_size, self.hps.z_size), dtype=tf.float32)
            self.kl_cost = tf.zeros([], dtype=tf.float32)
            actual_input_x = self.input_x
            self.initial_state = cell.zero_state(
                batch_size=hps.batch_size, dtype=tf.float32)

        self.num_mixture = hps.num_mixture

        # TODO(deck): Better understand this comment.
        # Number of outputs is 3 (one logit per pen state) plus 6 per mixture
        # component: mean_x, stdev_x, mean_y, stdev_y, correlation_xy, and the
        # mixture weight/probability (Pi_k)
        n_out = (3 + self.num_mixture * 6)

        with tf.variable_scope('RNN'):
            output_w = tf.get_variable('output_w', [self.hps.dec_rnn_size, n_out])
            output_b = tf.get_variable('output_b', [n_out])

        # decoder module of sketch-rnn is below
        output, last_state = tf.nn.dynamic_rnn(
            cell,
            actual_input_x,
            initial_state=self.initial_state,
            time_major=False,
            swap_memory=True,
            dtype=tf.float32,
            scope='RNN')

        output = tf.reshape(output, [-1, hps.dec_rnn_size])
        output = tf.nn.xw_plus_b(output, output_w, output_b)
        self.final_state = last_state

        # NB: the below are inner functions, not methods of Model
        def tf_2d_normal(x1, x2, mu1, mu2, s1, s2, rho):
            """Returns result of eq # 24 of http://arxiv.org/abs/1308.0850."""
            norm1 = tf.subtract(x1, mu1)
            norm2 = tf.subtract(x2, mu2)
            s1s2 = tf.multiply(s1, s2)
            # eq 25
            z = (tf.square(tf.div(norm1, s1)) + tf.square(tf.div(norm2, s2)) -
                 2 * tf.div(tf.multiply(rho, tf.multiply(norm1, norm2)), s1s2))
            neg_rho = 1 - tf.square(rho)
            result = tf.exp(tf.div(-z, 2 * neg_rho))
            denom = 2 * np.pi * tf.multiply(s1s2, tf.sqrt(neg_rho))
            result = tf.div(result, denom)
            return result

        def get_lossfunc(z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr,
                         z_pen_logits, x1_data, x2_data, pen_data):
            """Returns a loss fn based on eq #26 of http://arxiv.org/abs/1308.0850."""
            # This represents the L_R only (i.e. does not include the KL loss term).

            result0 = tf_2d_normal(x1_data, x2_data, z_mu1, z_mu2, z_sigma1, z_sigma2,
                                   z_corr)
            epsilon = 1e-6
            # result1 is the loss wrt pen offset (L_s in equation 9 of
            # https://arxiv.org/pdf/1704.03477.pdf)
            result1 = tf.multiply(result0, z_pi)
            result1 = tf.reduce_sum(result1, 1, keep_dims=True)
            result1 = -tf.log(result1 + epsilon)  # avoid log(0)

            fs = 1.0 - pen_data[:, 2]  # use training data for this
            fs = tf.reshape(fs, [-1, 1])
            # Zero out loss terms beyond N_s, the last actual stroke
            result1 = tf.multiply(result1, fs)

            # result2: loss wrt pen state, (L_p in equation 9)
            result2 = tf.nn.softmax_cross_entropy_with_logits(
                labels=pen_data, logits=z_pen_logits)
            result2 = tf.reshape(result2, [-1, 1])
            if not self.hps.is_training:  # eval mode, mask eos columns
                result2 = tf.multiply(result2, fs)

            result = result1 + result2
            return result

        # below is where we need to do MDN (Mixture Density Network) splitting of
        # distribution params
        def get_mixture_coef(output):
            """Returns the tf slices containing mdn dist params."""
            # This uses eqns 18 -> 23 of http://arxiv.org/abs/1308.0850.
            z = output
            z_pen_logits = z[:, 0:3]  # pen states
            z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr = tf.split(z[:, 3:], 6, 1)

            # process output z's into MDN parameters

            # softmax all the pi's and pen states:
            z_pi = tf.nn.softmax(z_pi)
            z_pen = tf.nn.softmax(z_pen_logits)

            # exponentiate the sigmas and also make corr between -1 and 1.
            z_sigma1 = tf.exp(z_sigma1)
            z_sigma2 = tf.exp(z_sigma2)
            z_corr = tf.tanh(z_corr)

            r = [z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_pen, z_pen_logits]
            return r

        out = get_mixture_coef(output)
        [o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2, o_corr, o_pen, o_pen_logits] = out

        self.pi = o_pi
        self.mu1 = o_mu1
        self.mu2 = o_mu2
        self.sigma1 = o_sigma1
        self.sigma2 = o_sigma2
        self.corr = o_corr
        self.pen_logits = o_pen_logits
        # pen state probabilities (result of applying softmax to self.pen_logits)
        self.pen = o_pen

        # reshape target data so that it is compatible with prediction shape
        target = tf.reshape(self.output_x, [-1, 5])
        [x1_data, x2_data, eos_data, eoc_data, cont_data] = tf.split(target, 5, 1)
        pen_data = tf.concat([eos_data, eoc_data, cont_data], 1)

        lossfunc = get_lossfunc(o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2, o_corr,
                                o_pen_logits, x1_data, x2_data, pen_data)

        self.r_cost = tf.reduce_mean(lossfunc)

        if self.hps.is_training:
            self.lr = tf.Variable(self.hps.learning_rate, trainable=False)
            optimizer = tf.train.AdamOptimizer(self.lr)

            self.kl_weight = tf.Variable(self.hps.kl_weight_start, trainable=False)
            self.cost = self.r_cost + self.kl_cost * self.kl_weight

            gvs = optimizer.compute_gradients(self.cost)
            g = self.hps.grad_clip
            capped_gvs = [(tf.clip_by_value(grad, -g, g), var) for grad, var in gvs]
            self.train_op = optimizer.apply_gradients(
                capped_gvs, global_step=self.global_step, name='train_step')
Exemplo n.º 11
0
def sample_from_latent_distribution(z_mean, z_logvar):
    """Sample from the encoder distribution with reparametrization trick."""
    return tf.add(z_mean,
                  tf.exp(z_logvar / 2) *
                  tf.random_normal(tf.shape(z_mean), 0, 1),
                  name="latent")
Exemplo n.º 12
0
# [기타, 포유류, 조류] : [6, 3] -> one hot encoding
y_data = np.array([
    [1, 0, 0],  # 기타[0]
    [0, 1, 0],  # 포유류[1]
    [0, 0, 1],  # 조류[2]
    [1, 0, 0],
    [1, 0, 0],
    [0, 0, 1]
])

# 2. X,Y변수 정의 
X = tf.placeholder(dtype=tf.float32, shape =[None,2]) # [관측치,입력수]
Y = tf.placeholder(dtype=tf.float32, shape =[None,3]) # [관측치,출력수]

# 3. w,b변수 정의 : 초기값 난수 이용 
w = tf.Variable(tf.random_normal([2,3])) #[입력수,출력수]
b = tf.Variable(tf.random_normal([3])) # [출력수]

# 4. softmax 분류기 
# 1) 회귀방정식 : 예측치 
model = tf.matmul(X, w) + b # 회귀모델 :[6,3]

# softmax(예측치)
softmax = tf.nn.softmax(model) # 활성함수 적용(0~1) : y1:0.8,y2:0.1,y3:0.1

# (2) loss function 
# 1차 방법 : Cross Entropy 이용 : -sum(Y * log(model)) 
#loss = -tf.reduce_mean(Y * tf.log(softmax) + (1 - Y) * tf.log(1 - softmax))

# 2차 방법 : Softmmax + CrossEntropy
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
Exemplo n.º 13
0
            # x = tf.layers.batch_normalization(x, training=training)
            # x = tf.nn.relu(x)
            # x = tf.layers.conv2d(x, 1, 1, 1, "same", activation=tf.nn.tanh)
            x = tf.layers.dense(input, 1024, use_bias=False)
            x = tf.layers.batch_normalization(x, training=training,)
            x = tf.nn.relu(x)
            x = tf.layers.dense(x, 7*7*128, use_bias=False)
            x = tf.layers.batch_normalization(x, training=training)
            x = tf.nn.relu(x)
            x = tf.reshape(x, [-1, 7, 7, 128])
            x = tf.layers.conv2d_transpose(x, 64, 4, 2, "same", use_bias=False)
            x = tf.layers.batch_normalization(x, training=training)
            x = tf.nn.relu(x)
            x = tf.layers.conv2d_transpose(x, 1, 4, 2, "same", activation=tf.nn.tanh)
        return x

if __name__=="__main__":
    import numpy as np
    tf.disable_v2_behavior()
    with tf.variable_scope("test"):
        x = tf.random_normal([10, 5], name="randn")
        x = tf.layers.batch_normalization(x)
        x = tf.layers.batch_normalization(x)
    ss=tf.Session()
    ss.run(tf.global_variables_initializer())
    c=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="test")
    # with tf.variable_scope("test", reuse=True):
    #     v=tf.get_variable("batch_norm/moving_mean")
    # print(ss.run(v))
    print(c)
Exemplo n.º 14
0
users = user_book_matrix.index.tolist()
books = user_book_matrix.columns.tolist()
user_book_matrix = user_book_matrix.as_matrix()

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

num_input = combined['Book-Title'].nunique()
num_hidden_1 = 10
num_hidden_2 = 5

X = tf.placeholder(tf.float64, [None, num_input])

weights = {
    'encoder_h1':
    tf.Variable(tf.random_normal([num_input, num_hidden_1], dtype=tf.float64)),
    'encoder_h2':
    tf.Variable(
        tf.random_normal([num_hidden_1, num_hidden_2], dtype=tf.float64)),
    'decoder_h1':
    tf.Variable(
        tf.random_normal([num_hidden_2, num_hidden_1], dtype=tf.float64)),
    'decoder_h2':
    tf.Variable(tf.random_normal([num_hidden_1, num_input], dtype=tf.float64)),
}

biases = {
    'encoder_b1':
    tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'encoder_b2':
    tf.Variable(tf.random_normal([num_hidden_2], dtype=tf.float64)),
Exemplo n.º 15
0
def sampling(output):
    mu, logstd = tf.split(output, num_or_size_splits=2, axis=-1)
    sigma = tf.nn.softplus(logstd)
    ws = mu + tf.random_normal(tf.shape(mu)) * sigma
    return ws, mu, sigma
Exemplo n.º 16
0
import numpy as np

# In[7]:

n_f = 10
n_d_n = 3

# In[8]:

x = tf.placeholder(tf.float32, (None, n_f))

# In[10]:

b = tf.Variable(tf.zeros([n_d_n]))

w = tf.Variable(tf.random_normal([n_f, n_d_n]))

# In[11]:

#y=mx+c
xw = tf.matmul(x, w)

# In[12]:

z = tf.add(xw, b)

# In[13]:

#activation function

a = tf.sigmoid(z)
Exemplo n.º 17
0
w = tf.matmul(A, v)

with tf.Session() as session:
    output = session.run(w,
                         feed_dict={
                             A: np.random.randn(5, 5),
                             v: np.random.randn(5, 1)
                         })

    print(output, type(output))

# TensorFlow variables are like Theano shared variables.
# But Theano variables are like TensorFlow placeholders.

shape = (2, 2)
x = tf.Variable(tf.random_normal(shape))
t = tf.Variable(0)

init = tf.global_variables_initializer()

with tf.Session() as session:
    out = session.run(init)
    print(out)

    print(x.eval())
    print(t.eval())

# find mininum of cost function
u = tf.Variable(20.0)
cost = u * u + u + 1.0
Exemplo n.º 18
0
data = read_csv('./model/TCD/tcd.csv', sep=',', encoding='CP949')

xy = np.array(data, dtype=np.float32)

# 2개의 변인 입력
x_data = xy[:, 0:-1]

# BMI 값을 입력 받습니다.m
y_data = xy[:, [-1]]

# 플레이스 홀더를 설정합니다.
X = tf.placeholder(tf.float32, shape=[None, 25])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([25, 1]), name='DN')
# 가중치 변수에 [25,1] 구조로 랜덤값 생성
b = tf.Variable(tf.random_normal([1]), name='bias')

hypothesis = tf.matmul(X, W) + b
# 예측값을 찾는 방법에 대한 가설***//

cost = tf.reduce_mean(tf.square(hypothesis - Y))
# 비용 함수 설정

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.000005)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
Exemplo n.º 19
0
 def action_sample(self):
     return self.p + 1.0 * (tf.exp(self.logstd) *
                            tfc.random_normal(tf.shape(self.p)))
Exemplo n.º 20
0
def lenet(use_pretrained=False):  # modify from lenet model

    if use_pretrained == False:
        # Random initialize
        weights = {
            'conv1':
            tf.get_variable('LN_conv1_w', [5, 5, 3, 64],
                            initializer=tf.uniform_unit_scaling_initializer()),
            'conv2':
            tf.get_variable('LN_conv2_w', [5, 5, 64, 128],
                            initializer=tf.uniform_unit_scaling_initializer()),
            'ip1':
            tf.get_variable('LN_ip1_w', [5 * 5 * 128, 1024],
                            initializer=tf.uniform_unit_scaling_initializer()),
            'ip2':
            tf.get_variable('LN_ip2_w', [1024, 10],
                            initializer=tf.uniform_unit_scaling_initializer())
        }

        biases = {
            'conv1':
            tf.Variable(tf.random_normal(shape=[64], stddev=0.5),
                        name='LN_conv1_b'),
            'conv2':
            tf.Variable(tf.random_normal(shape=[128], stddev=0.5),
                        name='LN_conv2_b'),
            'ip1':
            tf.Variable(tf.random_normal(shape=[1024], stddev=0.5),
                        name='LN_ip1_b'),
            'ip2':
            tf.Variable(tf.random_normal(shape=[10], stddev=0.5),
                        name='LN_ip2_b')
        }
    else:
        # initialized by pre-trained weight
        npyfile = np.load('student.npy')
        npyfile = npyfile.item()
        weights = {
            'conv1': tf.Variable(npyfile['conv1']['weights'],
                                 name='LN_conv1_w'),
            'conv2': tf.Variable(npyfile['conv2']['weights'],
                                 name='LN_conv2_w'),
            'ip1': tf.Variable(npyfile['ip1']['weights'], name='LN_ip1_w'),
            'ip2': tf.Variable(npyfile['ip2']['weights'], name='LN_ip2_w'),
        }

        biases = {
            'conv1': tf.Variable(npyfile['conv1']['biases'],
                                 name='LN_conv1_b'),
            'conv2': tf.Variable(npyfile['conv2']['biases'],
                                 name='LN_conv2_b'),
            'ip1': tf.Variable(npyfile['ip1']['biases'], name='LN_ip1_b'),
            'ip2': tf.Variable(npyfile['ip2']['biases'], name='LN_ip2_b'),
        }

    conv1 = conv(x, weights['conv1'], biases['conv1'], padding='VALID')
    pool1 = maxpool2d(conv1, k=2, s=2)
    conv2 = conv(pool1, weights['conv2'], biases['conv2'], padding='VALID')
    pool2 = maxpool2d(conv2, k=2, s=2, padding='VALID')

    ip1 = tf.reshape(pool2, [-1, weights['ip1'].get_shape().as_list()[0]])
    ip1 = tf.add(tf.matmul(ip1, weights['ip1']), biases['ip1'])
    ip1_relu = tf.nn.relu(ip1)
    ip2 = tf.add(tf.matmul(ip1_relu, weights['ip2']), biases['ip2'])
    return ip2
Exemplo n.º 21
0
import tensorflow.compat.v1 as tf
tf.set_random_seed(777) 

x1_data = [73., 93., 89., 96., 73.]
x2_data = [80., 88., 91., 98., 77.]
x3_data = [75., 93., 90., 100., 70.]

y_data = [152., 185., 180., 196., 142.]

x1 = tf.placeholder(tf.float32)
x2 = tf.placeholder(tf.float32)
x3 = tf.placeholder(tf.float32)

Y = tf.placeholder(tf.float32)

w1 = tf.Variable(tf.random_normal([1], name="weight1"))
w2 = tf.Variable(tf.random_normal([1], name="weight2"))
w3 = tf.Variable(tf.random_normal([1], name="weight3"))
b = tf.Variable(tf.random_normal([1], name="bias"))

hypothesis = x1 * w1 + x2 * w2 + x3 + w3 + b

# cost / loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)    #  0.00001
train = optimizer.minimize(cost)

sess = tf.Session()

# 변수 초기화
Exemplo n.º 22
0
def train(model_path, learning_rate, epoch, noisy=False):
    total_epoch = epoch
    teacher = nin()
    student = lenet()
    if noisy == True:
        drop_scale = 1 / Nratio
        noisy_mask = tf.nn.dropout(tf.constant(
            np.float32(np.ones((batch_size, 1))) / drop_scale),
                                   keep_prob=Nratio)  #(batchsize,1)
        gaussian = tf.random_normal(shape=[batch_size, 1],
                                    mean=0.0,
                                    stddev=Nsigma)
        noisy = tf.mul(noisy_mask, gaussian)
        #noisy_add = tf.add(tf.constant(np.float32(np.ones((batch_size,1)))), noisy)
        teacher = tf.mul(teacher,
                         tf.tile(noisy, tf.constant([1, 10])))  #(batchsize,10)
        #teacher = tf.add(teacher, tf.tile(noisy,tf.constant([1,10])))
        print(bcolors.G + "prepare for training, noisy mode" + bcolors.END)
        tf_loss = tf.nn.l2_loss(teacher - student) / batch_size
    elif KD == True:  # correct Hinton method at 2017.1.3
        print(bcolors.G + "prepare for training, knowledge distilling mode" +
              bcolors.END)
        one_hot = tf.one_hot(y, n_classes, 1.0, 0.0)
        #one_hot = tf.cast(one_hot_int, tf.float32)
        teacher_tau = tf.scalar_mul(1.0 / tau, teacher)
        student_tau = tf.scalar_mul(1.0 / tau, student)
        objective1 = tf.nn.sigmoid_cross_entropy_with_logits(
            student_tau, one_hot)
        objective2 = tf.scalar_mul(0.5, tf.square(student_tau - teacher_tau))
        tf_loss = (lamda * tf.reduce_sum(objective1) +
                   (1 - lamda) * tf.reduce_sum(objective2)) / batch_size
    else:
        print(bcolors.G + "prepare for training, NIPS2014 mode" + bcolors.END)
        tf_loss = tf.nn.l2_loss(teacher - student) / batch_size

    optimizer1 = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(tf_loss)
    optimizer2 = tf.train.AdamOptimizer(learning_rate=learning_rate /
                                        10).minimize(tf_loss)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    sess = tf.InteractiveSession(config=tf.ConfigProto(
        gpu_options=gpu_options, allow_soft_placement=True))
    tf.initialize_all_variables().run()
    with tf.device('/cpu:0'):
        saver = tf.train.Saver(max_to_keep=100)
        #saver.restore(sess, os.path.join(model_path,'model-99')
    data, label = read_cifar10('train')
    index = np.array(range(len(data)))  # index randomly ordered
    mean = cal_mean()
    begin = time.time()
    iterations = len(data) // batch_size
    decay_step = int(total_epoch * 0.8)
    cnt = 0
    dropout_rate = dropout
    print(bcolors.G + "number of iterations (per epoch) =" +
          str(len(data) / batch_size) + bcolors.END)
    for i in range(total_epoch):
        np.random.shuffle(index)
        cost_sum = 0
        for j in range(iterations):
            batch_x = np.float32(
                data[index[j * batch_size:(j + 1) * batch_size]]) - mean
            batch_y = np.squeeze(
                np.float32(label[index[j * batch_size:(j + 1) * batch_size]]))
            if cnt / decay_step == 0:
                lr = learning_rate
                _, cost = sess.run([optimizer1, tf_loss],
                                   feed_dict={
                                       x: batch_x,
                                       y: batch_y,
                                       keep_prob: 1 - dropout_rate
                                   })
            elif cnt / decay_step == 1:
                lr = learning_rate / 10
                _, cost = sess.run([optimizer2, tf_loss],
                                   feed_dict={
                                       x: batch_x,
                                       y: batch_y,
                                       keep_prob: 1 - dropout_rate
                                   })
            cost_sum += cost
            #pdb.set_trace()
            #if (j % int(iterations*0.25) == 0):
            #    print(("epoch %d-iter %d, cost = %f , avg-cost = %f"%(i, j, cost, cost/n_classes))
            #    sys.stdout.flush()
        cnt += 1
        avg_time = time.time() - begin
        print(
            "epoch %d - avg. %f seconds in each epoch, lr = %.0e, cost = %f , avg-cost-per-logits = %f"
            % (i, avg_time / cnt, lr, cost_sum,
               cost_sum / iterations / n_classes))
        if np.mod(i + 1, 10) == 0:
            print("Epoch ", i + 1, " is done. Saving the model ...")
            with tf.device('/cpu:0'):
                if not os.path.exists(model_path):
                    os.makedirs(model_path)
                saver.save(sess,
                           os.path.join(model_path, 'model'),
                           global_step=i)
        sys.stdout.flush()
import numpy as np

path = 'https://raw.githubusercontent.com/hunkim/DeepLearningZeroToAll/master/data-04-zoo.csv'
xy = np.genfromtxt(path, delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

nb_classes = 7  # 0 ~ 6

X = tf.placeholder(tf.float32, [None, 16])
Y = tf.placeholder(tf.int32, [None, 1])   # 0 ~ 6

Y_one_hot = tf.one_hot(Y, nb_classes)   # one hot
Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes])

W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')

# tf.nn.softmax compute softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
logits = tf.matmul(X, W) + b
hypothesis = tf.nn.softmax(logits)

# Cross entropy cost/loss
cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_one_hot)
cost = tf.reduce_mean(cost_i)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

prediction = tf.argmax(hypothesis, 1)
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Exemplo n.º 24
0
train_x,train_y,test_x,test_y = create_feature_sets_and_labels('left.txt','right.txt')

n_nodes_hl1 = 1500
n_nodes_hl2 = 1500
n_nodes_hl3 = 1500

n_classes = 2
batch_size = 100
hm_epochs = 10

x = tf.placeholder('float')
y = tf.placeholder('float')

hidden_1_layer = {'f_fum':n_nodes_hl1,
                  'weight':tf.Variable(tf.random_normal([len(train_x[0]), n_nodes_hl1])),
                  'bias':tf.Variable(tf.random_normal([n_nodes_hl1]))}

hidden_2_layer = {'f_fum':n_nodes_hl2,
                  'weight':tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
                  'bias':tf.Variable(tf.random_normal([n_nodes_hl2]))}

hidden_3_layer = {'f_fum':n_nodes_hl3,
                  'weight':tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
                  'bias':tf.Variable(tf.random_normal([n_nodes_hl3]))}

output_layer = {'f_fum':None,
                'weight':tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
                'bias':tf.Variable(tf.random_normal([n_classes])),}

Exemplo n.º 25
0
def conv2d_train(x,
                 variational_params,
                 strides,
                 padding,
                 data_format="NHWC",
                 clip_alpha=None,
                 eps=common.EPSILON):
    R"""Training computation for a variational conv2d.

  In variational dropout we train a Bayesian neural network where we assume a
  fully-factorized Gaussian posterior and log uniform prior over the weights.

  During training, we need to sample weights from this distribution. Rather
  than sample weights for each sample in the input batch, we can calculate the
  parameters of the distribution over the pre-activations analytically (this
  step is called the local reparameterization trick). This function calculates
  the mean and standard deviation of the distribution over the pre-activations,
  and then draws a single sample for each element in the input batch and passes
  them as output.

  Args:
    x: NHWC tf.Tensor representing the input batch of features.
    variational_params: 2-tuple of Tensors, where the first tensor is the \theta
      values and the second contains the log of the \sigma^2 values.
    strides: The stride of the sliding window for each dimension of `x`.
      Identical to standard strides argument for tf.conv2d.
    padding: String. One of "SAME", or "VALID". Identical to standard padding
      argument for tf.conv2d.
    data_format: 'NHWC' or 'NCHW' ordering of 4-D input Tensor.
    clip_alpha: Int or None. If integer, we clip the log \alpha values to
      [-clip_alpha, clip_alpha]. If None, don't clip the values.
    eps: Small constant value to use in log and sqrt operations to avoid NaNs.

  Returns:
    Output Tensor of the conv2d operation.

  Raises:
    RuntimeError: If the variational_params argument
    is not a 2-tuple.
  """
    theta, log_sigma2 = _verify_variational_params(variational_params)

    if clip_alpha:
        # Compute the log_alphas and then compute the
        # log_sigma2 again so that we can clip on the
        # log alpha magnitudes
        log_alpha = common.compute_log_alpha(log_sigma2, theta, eps,
                                             clip_alpha)
        log_sigma2 = common.compute_log_sigma2(log_alpha, theta, eps)

    # Compute the mean and standard deviation of the distribution over the
    # convolution outputs
    mu_activation = tf.nn.conv2d(x,
                                 theta,
                                 strides,
                                 padding,
                                 data_format=data_format)
    std_activation = tf.sqrt(
        tf.nn.conv2d(tf.square(x),
                     tf.exp(log_sigma2),
                     strides,
                     padding,
                     data_format=data_format) + eps)

    output_shape = tf.shape(std_activation)
    return mu_activation + std_activation * tf.random_normal(output_shape)
Exemplo n.º 26
0
  def testPool(self, pooling_method):
    batch = 2
    depth = 3
    height = 4
    width = 6
    channels = 3
    tf.random.set_random_seed(1234)
    inputs = tf.random_normal([batch, depth, height, width, channels])

    stride_d = 3
    stride_h = 2
    stride_w = 3

    graph = mtf.Graph()
    mesh = mtf.Mesh(graph, "my_mesh")
    batch_dim = mtf.Dimension("batch", batch)
    depth_dim = mtf.Dimension("depth", depth)
    height_dim = mtf.Dimension("height", height)
    width_dim = mtf.Dimension("width", width)
    channels_dim = mtf.Dimension("channels", channels)

    mtf_inputs = mtf.import_tf_tensor(
        mesh, inputs, shape=mtf.Shape(
            [batch_dim, depth_dim, height_dim, width_dim, channels_dim]))

    if pooling_method == "MAX_2D":
      mtf_outputs = mtf.layers.max_pool2d(
          mtf_inputs, ksize=(stride_h, stride_w))
      inputs = tf.reshape(inputs, [batch * depth, height, width, channels])
      expected_outputs = tf.keras.layers.MaxPooling2D(
          (stride_h, stride_w))(inputs)
      expected_outputs = tf.reshape(
          expected_outputs,
          [batch, depth, int(height / stride_h),
           int(width / stride_w), channels])

    elif pooling_method == "AVG_2D":
      mtf_outputs = mtf.layers.avg_pool2d(
          mtf_inputs, ksize=(stride_h, stride_w))
      inputs = tf.reshape(inputs, [batch * depth, height, width, channels])
      expected_outputs = tf.keras.layers.AveragePooling2D(
          (stride_h, stride_w))(inputs)
      expected_outputs = tf.reshape(
          expected_outputs,
          [batch, depth, int(height / stride_h),
           int(width / stride_w), channels])

    elif pooling_method == "MAX_3D":
      mtf_outputs = mtf.layers.max_pool3d(
          mtf_inputs, ksize=[stride_d, stride_h, stride_w])
      expected_outputs = tf.keras.layers.MaxPooling3D(
          [stride_d, stride_h, stride_w])(inputs)

    elif pooling_method == "AVG_3D":
      mtf_outputs = mtf.layers.avg_pool3d(
          mtf_inputs, ksize=[stride_d, stride_h, stride_w])
      expected_outputs = tf.keras.layers.AveragePooling3D(
          [stride_d, stride_h, stride_w])(inputs)

    mtf_gradient = mtf.gradients([mtf_outputs], [mtf_inputs])[0]

    mesh_impl = mtf.placement_mesh_impl.PlacementMeshImpl(
        shape=[], layout={}, devices=[""])
    lowering = mtf.Lowering(graph, {mesh: mesh_impl})
    actual_outputs = lowering.export_to_tf_tensor(mtf_outputs)
    actual_gradient = lowering.export_to_tf_tensor(mtf_gradient)

    tf_group = lowering.copy_masters_to_slices()
    init = tf.global_variables_initializer()
    self.evaluate(init)
    self.evaluate(tf_group)
    actual, expected = self.evaluate([actual_outputs, expected_outputs])
    self.assertAllClose(actual, expected)

    actual = self.evaluate(actual_gradient)
    if pooling_method == "MAX_2D":
      expected_non_zeros = batch * depth * height * width * channels / (
          stride_h * stride_w)
      self.assertEqual(np.count_nonzero(actual), expected_non_zeros)

    elif pooling_method == "AVG_2D":
      expected = np.ones((batch, depth, height, width, channels),
                         dtype=np.float32) / stride_h / stride_w
      self.assertAllClose(actual, expected)

    elif pooling_method == "MAX_3D":
      expected_non_zeros = batch * depth * height * width * channels / (
          stride_d * stride_h * stride_w)
      self.assertEqual(np.count_nonzero(actual), expected_non_zeros)

    elif pooling_method == "AVG_3D":
      expected = np.ones((batch, depth, height, width, channels),
                         dtype=np.float32) / stride_d / stride_h / stride_w
      self.assertAllClose(actual, expected)
Exemplo n.º 27
0
#import tensorflow as tf

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

x_train = [1, 2, 3]
y_train = [1, 2, 3]

W = tf.Variable(tf.random_normal([1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

hypothesis = x_train * W + b
cost = tf.reduce_mean(tf.square(hypothesis - y_train))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for step in range(2001):
    sess.run(train)
    if step % 20 == 0:
        print(step, sess.run(cost), sess.run(W), sess.run(b))
Exemplo n.º 28
0
    def __call__(self,
                 input_state,
                 location_scale,
                 prev_locations=None,
                 is_training=False,
                 policy="learned",
                 sampling_stddev=1e-5):
        """Builds emission network.

    Args:
      input_state: 2-D Tensor of shape [batch, state dimensionality]
      location_scale: <= 1. and >= 0. the normalized location range
        [-location_scale, location_scale]
      prev_locations: if not None add prev_location to current proposed location
        (ie using relative locations)
      is_training: (Boolean) to indicate training or inference modes.
      policy: (String) 'learned': uses learned policy, 'random': uses random
        policy, or 'center': uses center look policy.
      sampling_stddev: Sampling distribution standard deviation.

    Returns:
      locations: network output reflecting next location to look at
        (normalized to range [-location_scale, location_scale]).
        The image locations mapping to locs are as follows:
          (-1, -1): upper left corner.
          (-1, 1): upper right corner.
          (1, 1): lower right corner.
          (1, -1): lower left corner.
      endpoints: dictionary with activations at different layers.
    """
        if self.var_list:
            reuse = True
        else:
            reuse = False

        batch_size = input_state.shape.as_list()[0]

        tf.logging.info("BUILD Emission Network")
        endpoints = {}
        net = input_state

        # Fully connected layers.
        with tf.variable_scope("emission_network", reuse=reuse):
            net, endpoints_ = model_utils.build_fc_layers(
                net,
                self.num_units_fc_layers,
                activation=self.activation,
                regularizer=self.regularizer)
        endpoints.update(endpoints_)

        # Tanh output layer.
        with tf.variable_scope("emission_network/output", reuse=reuse):
            output, _ = model_utils.build_fc_layers(
                net, [self.location_dims],
                activation=tf.nn.tanh,
                regularizer=self.regularizer)

        # scale location ([-location_scale, location_scale] range
        mean_locations = location_scale * output
        if prev_locations is not None:
            mean_locations = prev_locations + mean_locations

        if policy == "learned":
            endpoints["mean_locations"] = mean_locations
            if is_training:
                # At training samples random location.
                locations = mean_locations + tf.random_normal(
                    shape=(batch_size, self.location_dims),
                    stddev=sampling_stddev)
                # Ensures range [-location_scale, location_scale]
                locations = tf.clip_by_value(locations, -location_scale,
                                             location_scale)
                tf.logging.info("Sampling locations.")
                tf.logging.info(
                    "====================================================")
            else:
                # At inference uses the mean value for the location.
                locations = mean_locations

            locations = tf.stop_gradient(locations)
        elif policy == "random":
            # Use random policy for location.
            locations = tf.random_uniform(shape=(batch_size,
                                                 self.location_dims),
                                          minval=-location_scale,
                                          maxval=location_scale)
            endpoints["mean_locations"] = mean_locations
        elif policy == "center":
            # Use center look policy.
            locations = tf.zeros(shape=(batch_size, self.location_dims))
            endpoints["mean_locations"] = mean_locations
        else:
            raise ValueError(
                "policy can be either 'learned', 'random', or 'center'")

        if not reuse:
            self.collect_variables()
        return locations, endpoints
Exemplo n.º 29
0
def main(argv):

    # 集群描述
    cluster = tf.train.ClusterSpec({
        "ps": ["172.17.0.2:9666"],
        "worker": ["172.17.0.3:9666"]
    })

    # 创建不同的服务
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)

    if FLAGS.job_name == "ps":
        server.join()
    else:
        work_device = "/job:worker/task:0/cpu:0"
        with tf.device(
                tf.train.replica_device_setter(worker_device=work_device,
                                               cluster=cluster)):

            # 全局计数器
            global_step = tf.train.get_or_create_global_step()

            # 准备数据
            # mnist=tensorflow.keras.datasets.mnist
            mnist = keras.datasets.mnist
            # mnist = input_data.read_data_sets("./data/mnist/", one_hot=True)

            # 建立数据的占位符
            with tf.variable_scope("data"):
                x = tf.placeholder(tf.float32, [None, 28 * 28], name='x')
                y_true = tf.placeholder(tf.float32, [None, 10], name='y_true')

            # 建立全连接层的神经网络
            with tf.variable_scope("fc_model"):
                # 随机初始化权重和偏重
                weight = tf.Variable(tf.random_normal([28 * 28, 10],
                                                      mean=0.0,
                                                      stddev=1.0),
                                     name="w")
                bias = tf.Variable(tf.constant(0.0, shape=[10]), name="b")
                # 预测结果
                y_predict = tf.matmul(x, weight) + bias
                y_predict = tf.Variable(y_predict, name="predict")

            # 所有样本损失值的平均值
            with tf.variable_scope("soft_loss"):
                loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(labels=y_true,
                                                            logits=y_predict))

            # 梯度下降
            with tf.variable_scope("optimizer"):
                train_op = tf.train.GradientDescentOptimizer(0.1).minimize(
                    loss, global_step=global_step, name="train_op")

            # 计算准确率
            with tf.variable_scope("acc"):
                equal_list = tf.equal(tf.argmax(y_true, 1),
                                      tf.argmax(y_predict, 1))
                accuracy = tf.reduce_mean(tf.cast(equal_list, tf.float32),
                                          name="accuracy")

        # 创建分布式会话
        with tf.train.MonitoredTrainingSession(
                checkpoint_dir="./temp/ckpt/test",
                master="grpc://172.17.0.3:9666",
                is_chief=(FLAGS.task_index == 0),
                config=tf.ConfigProto(log_device_placement=True),
                hooks=[tf.train.StopAtStepHook(last_step=100)]) as mon_sess:
            while not mon_sess.should_stop():
                # mnist_x, mnist_y = mnist.train.next_batch(4000)
                (mnist_x, mnist_y), (_, _) = mnist.load_data()
                mnist_x = mnist_x.reshape(mnist_x.shape[0], 784)
                mnist_y = keras.utils.to_categorical(mnist_y, 10)
                mon_sess.run(train_op, feed_dict={x: mnist_x, y_true: mnist_y})

                graph_def = tf.get_default_graph().as_graph_def()
                output_graph_def = graph_util.convert_variables_to_constants(
                    mon_sess,
                    graph_def,
                    output_node_names=["acc/Mean", "predict"])
                mf = tf.gfile.GFile("mymodel.pb", "wb")
                mf.write(output_graph_def.SerializeToString())
                print("训练第%d步, 准确率为%f" % (global_step.eval(session=mon_sess),
                                          mon_sess.run(accuracy,
                                                       feed_dict={
                                                           x: mnist_x,
                                                           y_true: mnist_y
                                                       })))
                if global_step.eval(session=mon_sess) == 99:
                    # graph_def = tf.get_default_graph().as_graph_def()
                    # output_graph_def = graph_util.convert_variables_to_constants(mon_sess,graph_def,[])
                    # with  tf.gfile.GFile("mymodel.pb","wb") as mf:
                    #     serialized_graph = output_graph_def.SerializeToString()
                    #     mf.write(serialized_graph)
                    # model_f.write(output_graph_def.SerializeToString())
                    save_path = "save_models/mymodel"
                    # train_op mf graph_def 3 paras can't be the first para

                    # keras.models.save_model(graph_def, save_path, save_format="tf")
                    break
Exemplo n.º 30
0
tf.disable_v2_behavior()
import numpy as np

x_data = [[1, 2, 1], [1, 3, 2], [1, 3, 4], [1, 5, 5], [1, 7, 5], [1, 2, 5],
          [1, 6, 6], [1, 7, 7]]
y_data = [[0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 1, 0],
          [1, 0, 0], [1, 0, 0]]

# Evaluation our model using this test dataset
x_test = [[2, 1, 1], [3, 1, 2], [3, 3, 4]]
y_test = [[0, 0, 1], [0, 0, 1], [0, 0, 1]]

X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])

W = tf.Variable(tf.random_normal([3, 3]))
b = tf.Variable(tf.random_normal([3]))

# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)

# Cross entropy cost/loss
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
# Try to change learning_rate to small numbers
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

# Correct prediction Test model
prediction = tf.argmax(hypothesis, 1)
is_correct = tf.equal(prediction, tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))