def mean(mean, variance, std=False): '''Output mean of ReLU for general Gaussian input. f(x) = max(x, 0). This function is broadcast-able, so you can provide multiple input means with a single variance or multiple input variances with a single input mean or multiple input means and variances. Args: mean: Input mean of size (Batch, Size). variance: Input variance vector (Batch, Size) or scalar v such that variance = v * ones(Size). std: Whether the provided `variance` is the standard deviation. Returns: Output mean of ReLU for general Gaussian input (Batch, Size). ''' std = variance if std else tf.sqrt(variance) zero_mean = std / tf.sqrt(2.0 * math.pi) if mean is None: return zero_mean # efficient computation when mean is zeros u = mean / (math.sqrt(2.0) * std) bias = 0.5 * mean * (1.0 + tf.erf(u)) return zero_mean * tf.exp(-u ** 2.0) + bias
def prob_is_largest(self, Y, mu, var, gh_x, gh_w): # work out what the mean and variance is of the indicated latent function. oh_on = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 1.0, 0.0), float_type) mu_selected = tf.reduce_sum(oh_on * mu, 1) var_selected = tf.reduce_sum(oh_on * var, 1) # generate Gauss Hermite grid X = tf.reshape(mu_selected, (-1, 1)) + gh_x * tf.reshape( tf.sqrt(tf.clip_by_value(2.0 * var_selected, 1e-10, np.inf)), (-1, 1) ) # compute the CDF of the Gaussian between the latent functions and the grid (including the selected function) dist = (tf.expand_dims(X, 1) - tf.expand_dims(mu, 2)) / tf.expand_dims( tf.sqrt(tf.clip_by_value(var, 1e-10, np.inf)), 2 ) cdfs = 0.5 * (1.0 + tf.erf(dist / np.sqrt(2.0))) cdfs = cdfs * (1 - 2e-4) + 1e-4 # blank out all the distances on the selected latent function oh_off = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 0.0, 1.0), float_type) cdfs = cdfs * tf.expand_dims(oh_off, 2) + tf.expand_dims(oh_on, 2) # take the product over the latent functions, and the sum over the GH grid. return tf.matmul(tf.reduce_prod(cdfs, reduction_indices=[1]), tf.reshape(gh_w / np.sqrt(np.pi), (-1, 1)))
def testLogNormalCDF(self): loc, scale = 1.5, 0.4 dist = tfd.LogNormal(loc=loc, scale=scale) x = np.array([1e-4, 1.0, 2.0], dtype=np.float32) cdf = dist.cdf(x) analytical_cdf = .5 + .5 * tf.erf((np.log(x) - loc) / (scale * np.sqrt(2))) self.assertAllClose(self.evaluate(cdf), self.evaluate(analytical_cdf))
def _ndtr(x): """Implements ndtr core logic.""" half_sqrt_2 = tf.constant( 0.5 * np.sqrt(2.), dtype=x.dtype, name="half_sqrt_2") w = x * half_sqrt_2 z = tf.abs(w) y = tf.where( tf.less(z, half_sqrt_2), 1. + tf.erf(w), tf.where(tf.greater(w, 0.), 2. - tf.math.erfc(z), tf.math.erfc(z))) return 0.5 * y
def pt_conv_2d(input_tensor, filter_shape, input_channels, output_channels, padding, name, stochastic=True, with_bias=True, reuse=False): with tf.variable_scope(name) as scope: kernel = tf.get_variable('kernel', [filter_shape[0], filter_shape[1], input_channels, output_channels], initializer=tf.contrib.layers.xavier_initializer(seed=322), dtype=tf.float32, trainable=True) log_alpha = tf.get_variable('log_alpha', [], initializer=tf.constant_initializer(-10.0), dtype=tf.float32, trainable=True) log_alpha = tf.clip_by_value(log_alpha, -20.0, 20.0) if not reuse: # computing reg k1, k2, k3 = 0.63576, 1.8732, 1.48695 C = -k1 mdkl = k1 * tf.nn.sigmoid(k2 + k3 * log_alpha) - 0.5 * tf.log1p(tf.exp(-log_alpha)) + C kl = -tf.reduce_sum(mdkl) * tf.reduce_prod(tf.cast(kernel.get_shape(), tf.float32)) tf.add_to_collection('kl_loss', kl) # computing output conved_mu = tf.nn.conv2d(input_tensor, kernel, [1, 1, 1, 1], padding=padding) conved_si = tf.sqrt(tf.nn.conv2d(input_tensor * input_tensor, tf.exp(log_alpha) * kernel * kernel, [1, 1, 1, 1], padding=padding)+1e-16) output = conved_mu if stochastic: output += tf.random_normal(conved_mu.shape, mean=0, stddev=1) * conved_si if with_bias: biases = tf.get_variable('biases', output_channels, tf.float32, tf.constant_initializer(0.0)) output = tf.nn.bias_add(output, biases) # summaries if not reuse: if with_bias: error = 0.5*(1.0+tf.erf((-conved_mu-biases)/tf.sqrt(2.0)/conved_si)) else: error = 0.5*(1.0+tf.erf((-conved_mu)/tf.sqrt(2.0)/conved_si)) tf.summary.scalar('error', tf.reduce_sum(error)) tf.summary.scalar('log_alpha', log_alpha) tf.add_to_collection('log_alpha', log_alpha) return output
def gelu(input_tensor): """Gaussian Error Linear Unit. This is a smoother version of the RELU. Original paper: https://arxiv.org/abs/1606.08415 Args: input_tensor: float Tensor to perform activation. Returns: `input_tensor` with the GELU activation applied. """ cdf = 0.5 * (1.0 + tf.erf(input_tensor / tf.sqrt(2.0))) return input_tensor * cdf
def gelu(x): # read # return 0.5*x*(1+tf.tanh(math.sqrt(2/math.pi)*(x+0.044715*tf.pow(x, 3)))) """Gaussian Error Linear Unit. This is a smoother version of the RELU. Original paper: https://arxiv.org/abs/1606.08415 Args: input_tensor: float Tensor to perform activation. Returns: `input_tensor` with the GELU activation applied. """ cdf = 0.5 * (1.0 + tf.erf(x / tf.sqrt(2.0))) return x * cdf
def pt_dense(input_tensor, num_inputs, num_outputs, name, stochastic=True, with_bias=True, reuse=False): with tf.variable_scope(name) as scope: W = tf.get_variable('W', [num_inputs, num_outputs], initializer=tf.truncated_normal_initializer(1e-2), dtype=tf.float32, trainable=True) log_alpha = tf.get_variable('log_alpha', [], initializer=tf.constant_initializer(-10.0), dtype=tf.float32, trainable=True) log_alpha = tf.clip_by_value(log_alpha, -20.0, 20.0) if not reuse: # computing reg k1, k2, k3 = 0.63576, 1.8732, 1.48695 C = -k1 mdkl = k1 * tf.nn.sigmoid(k2 + k3 * log_alpha) - 0.5 * tf.log1p(tf.exp(-log_alpha)) + C kl = -tf.reduce_sum(mdkl) * tf.reduce_prod(tf.cast(W.get_shape(), tf.float32)) tf.add_to_collection('kl_loss', kl) # computing output mu = tf.matmul(input_tensor, W) si = tf.sqrt(tf.matmul(input_tensor * input_tensor, tf.exp(log_alpha) * W * W) + 1e-16) output = mu if stochastic: output += tf.random_normal(mu.shape, mean=0, stddev=1) * si if with_bias: biases = tf.get_variable('biases', num_outputs, tf.float32, tf.constant_initializer(0.0)) output = tf.nn.bias_add(output, biases) # summaries if not reuse: if with_bias: error = 0.5*(1.0+tf.erf((-mu-biases)/tf.sqrt(2.0)/si)) else: error = 0.5*(1.0+tf.erf((-mu)/tf.sqrt(2.0)/si)) tf.summary.scalar('error', tf.reduce_sum(error)) tf.summary.scalar('log_alpha', log_alpha) tf.add_to_collection('log_alpha', log_alpha) return output
def gelu(input_tensor): """Gaussian Error Linear Unit. This is a smoother version of the RELU. Original paper: https://arxiv.org/abs/1606.08415 Args: x: float Tensor to perform activation. Returns: `x` with the GELU activation applied. """ # cdf = 0.5 * (1.0 + tf.tanh( # (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) # return x * cdf cdf = 0.5 * (1.0 + tf.erf(input_tensor / tf.sqrt(2.0))) return input_tensor * cdf
def normal_ccdf(x, mu, sigma2): """Normal CCDF""" # Check for degenerate distributions when sigma2 == 0 # if x >= mu, n = 0 # if x < mu, n = 1 # sigma2_le_0 = tf.less_equal(sigma2, 0.) # x_gte_mu = tf.greater_equal(x, mu) # x_lt_mu = tf.less(x, mu) # Never divide by zero, instead the logic below handles degenerate distribution cases # sigma2 = tf.cond(sigma2_le_0, lambda: tf.ones_like(sigma2), lambda: sigma2) p = (1. - 0.5 * (1. + tf.erf((x - mu) / tf.sqrt(2. * sigma2)))) # p = tf.cond(tf.logical_and(sigma2_le_0, x_gte_mu), lambda: tf.zeros_like(p), lambda: p) # p = tf.cond(tf.logical_and(sigma2_le_0, x_lt_mu), lambda: tf.ones_like(p), lambda: p) return p
def Wilcoxon_Signed_Rank_Test2D(x, y): """ Conduct the Wilcoxon signed-rank test between each row of two tensors. Formula referred: https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test Args: x: 2d tensor (MxN). The number of second dimension should be same to y's second. y: 2d tensor (LxN). The number of second dimension should be same to x's second. Returns: z: 2d tensor (MxL). The z-score. If the sign of z-score is positive, the x is bigger than y. p: 2d tensor (MxL). The p-value based on the two-sided test. """ tiled_X = tf.tile(tf.expand_dims(x, [1]), multiples = [1, tf.shape(y)[0], 1]) #[M, L, N] tiled_Y = tf.tile(tf.expand_dims(y, [0]), multiples = [tf.shape(x)[0], 1, 1]) #[M, L, N] subtract_XY = tiled_X - tiled_Y vector_Size = tf.cast(tf.shape(subtract_XY)[2], tf.float32) sign_Subtract_XY = tf.sign(subtract_XY) abs_Subtract_XY = tf.abs(subtract_XY) index_Dimension1 = tf.tile( tf.expand_dims(tf.expand_dims(tf.range(tf.shape(subtract_XY)[0]), axis = 1), axis = 2), multiples=[1, tf.shape(subtract_XY)[1], tf.shape(subtract_XY)[2]] ) #[M, L, N] index_Dimension2 = tf.tile( tf.expand_dims(tf.expand_dims(tf.range(tf.shape(subtract_XY)[1]), axis = 0), axis = 2), multiples=[tf.shape(subtract_XY)[0], 1, tf.shape(subtract_XY)[2]] ) #[M, L, N] index_Dimension3 = tf.nn.top_k(-abs_Subtract_XY, k=tf.shape(abs_Subtract_XY)[2], sorted=False).indices #[M, L, N] rank_Map = tf.stack([index_Dimension1, index_Dimension2, index_Dimension3], axis=3) #[M, L, N, 3] mapped_Sign_X = tf.gather_nd(sign_Subtract_XY, indices= rank_Map) tiled_Range = tf.tile( tf.expand_dims(tf.expand_dims(tf.cast(tf.range(tf.shape(subtract_XY)[2]), dtype=tf.float32), axis = 0), axis = 1), multiples=[tf.shape(subtract_XY)[0], tf.shape(subtract_XY)[1], 1] ) #[M, L, N] wilcoxon_Value = tf.reduce_sum(mapped_Sign_X * (tiled_Range + 1), axis=2) #[M, L] z_Score = wilcoxon_Value / tf.sqrt(vector_Size * (vector_Size + 1) * (2* vector_Size + 1) / 6) p_Value = 1 - tf.erf(tf.abs(z_Score) / tf.sqrt(2.0)) z_Score = tf.identity(z_Score, name="wilcoxon_Signed_Rank_Test_Z_Score") p_Value = tf.identity(p_Value, name="wilcoxon_Signed_Rank_Test_P_Value") return z_Score, p_Value
def xi_mapped(s, d, s_len, d_len, Q, Nw, Ns, NFFT, fs, P, nconst, mu, sigma): ''' Mapped a priori SNR training target. Inputs: s - clean waveform (dtype=tf.int32). d - noisy waveform (dtype=tf.int32). s_len - clean waveform length without padding (samples). d_len - noise waveform length without padding (samples). Q - SNR level. Nw - window length (samples). Ns - window shift (samples). NFFT - DFT components. fs - sampling frequency (Hz). P - padded waveform length (samples). nconst - normalization constant. mu - mean of a priori SNR in dB. sigma - standard deviation of a priori SNR in dB. Outputs: x_STMS - padded noisy single-sided magnitude spectrum. xi_mapped - mapped a priori SNR. seq_len - length of each sequence without padding. ''' (s, x, d) = tf.map_fn( lambda z: addnoisepad(z[0], z[1], z[2], z[3], z[4], P, nconst), (s, d, s_len, d_len, Q), dtype=(tf.float32, tf.float32, tf.float32)) # padded waveforms. seq_len = nframes(s_len, Ns) # length of each sequence. s_STMS = stms(s, Nw, Ns, NFFT) # clean speech STMS. d_STMS = stms(d, Nw, Ns, NFFT) # noise STMS. x_STMS = stms(x, Nw, Ns, NFFT) # noisy speech STMS. xi = tf.truediv(tf.square(tf.maximum(s_STMS, 1e-12)), tf.square(tf.maximum(d_STMS, 1e-12))) # a priori SNR. xi_dB = tf.multiply(10.0, log10(xi)) # a priori SNR in dB. xi_mapped = tf.multiply( 0.5, tf.add( 1.0, tf.erf( tf.truediv(tf.subtract(xi_dB, mu), tf.multiply( sigma, tf.sqrt(2.0)))))) # mapped a priori SNR. xi_mapped = tf.boolean_mask(xi_mapped, tf.sequence_mask(seq_len)) # convert to 2D. return (x_STMS, xi_mapped, seq_len) # (input, target, sequence length).
def _normal_distribution_cdf(x, stddev): """Evaluates the CDF of the normal distribution. Normal distribution with mean 0 and standard deviation stddev, evaluated at x=x. input and output `Tensor`s have matching shapes. Args: x: a `Tensor` stddev: a `Tensor` with the same shape as `x`. Returns: a `Tensor` with the same shape as `x`. """ return 0.5 * (1.0 + tf.erf(x / (math.sqrt(2) * stddev + 1e-20)))
def gelu(inputs, scope='gelu', reuse=None): """Gaussian Error Linear Unit. This is a smoother version of the ReLU. Paper: https://arxiv.org/abs/1606.08415 Args: - inputs: float Tensor - scope: scope name - reuse: whether to reuse Returns: `inputs` with the gelu activation applied. """ with tf.variable_scope(scope, reuse=reuse): alpha = 0.5 * (1.0 + tf.erf(inputs / tf.sqrt(2.0))) return inputs * alpha
def feat_extr(s, d, s_len, d_len, Q, Nw, Ns, NFFT, fs, P, nconst, mu, sigma): ''' Extracts input features and targets from given clean speech and noise. Inputs: s - clean waveform (dtype=tf.int32). d - noisy waveform (dtype=tf.int32). s_len - clean waveform length without padding (samples). d_len - noise waveform length without padding (samples). Q - SNR level. Nw - window length (samples). Ns - window shift (samples). NFFT - DFT components. fs - sampling frequency (Hz). P - padded waveform length (samples). nconst - normalization constant. mu - mean of a priori SNR in dB. sigma - standard deviation of a priori SNR in dB. Outputs: x_MS - padded noisy single-sided magnitude spectrum. phi_xi_dB - CDF of a priori SNR dB. seq_len - length of each sequence without padding. ''' (s, x, d) = tf.map_fn( lambda z: feat.addnoisepad(z[0], z[1], z[2], z[3], z[4], P, nconst), (s, d, s_len, d_len, Q), dtype=(tf.float32, tf.float32, tf.float32)) # padded waveforms. seq_len = feat.nframes(s_len, Ns) # length of each sequence. s_MS = feat.stms(s, Nw, Ns, NFFT) # clean speech magnitude spectrum. d_MS = feat.stms(d, Nw, Ns, NFFT) # noise magnitude spectrum. x_MS = feat.stms(x, Nw, Ns, NFFT) # noisy speech magnitude spectrum. xi = tf.div(tf.square(s_MS), tf.add(tf.square(d_MS), 1e-12)) # a priori SNR. xi_dB = tf.multiply(10.0, tf.add(log10(xi), 1e-12)) # a priori SNR dB. phi_xi_dB = tf.multiply( 0.5, tf.add( 1.0, tf.erf( tf.div(tf.subtract(xi_dB, mu), tf.multiply( sigma, tf.sqrt(2.0)))))) # cdf of a priori SNR in dB. phi_xi_dB = tf.boolean_mask(phi_xi_dB, tf.sequence_mask(seq_len)) # convert to 2D. return (x_MS, phi_xi_dB, seq_len)
def gelu(input_tensor): """Gaussian Error Linear Unit. This is a smoother version of the RELU. Original paper: https://arxiv.org/abs/1606.08415 Args: input_tensor: float Tensor to perform activation. Returns: `input_tensor` with the GELU activation applied. cdf = 0.5 * (1.0 + tf.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) return x * cdf 下面的erf是一个误差计算公式,整个结果就是精确的高斯误差线性单元结果 """ cdf = 0.5 * (1.0 + tf.erf(input_tensor / tf.sqrt(2.0))) return input_tensor * cdf
def Wilcoxon_Rank_Sum_Test2D(x, y): """ Conduct the Wilcoxon rank-sum test (Mann–Whitney U test) between each row of two tensors. Formula referred: https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test http://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_nonparametric/BS704_Nonparametric4.html http://3months.tistory.com/128 Args: x: 2d tensor (MxA). y: 2d tensor (LxB). Returns: z: 2d tensor (MxL). The z-score. If the sign of z-score is positive, the x's mean is bigger than y's. p: 2d tensor (MxL). The p-value based on the two-sided test. """ x_Size = tf.cast(tf.shape(x)[1], tf.float32) y_Size = tf.cast(tf.shape(y)[1], tf.float32) tiled_X = tf.tile(tf.expand_dims(x, [1]), multiples = [1, tf.shape(y)[0], 1]) #[M, L, A] tiled_Y = tf.tile(tf.expand_dims(y, [0]), multiples = [tf.shape(x)[0], 1, 1]) #[M, L, B] concat_XY = tf.concat([tiled_X, tiled_Y], axis=2) #[M, L, (A+B)] rank_Map = tf.cast(tf.nn.top_k(-concat_XY, k=tf.shape(concat_XY)[2], sorted=False).indices, dtype=tf.float32) #[M, L, (A+B)] y_Map = tf.clip_by_value(rank_Map - x_Size + 1, clip_value_min=0, clip_value_max=1) #[M, L, (A+B)] tiled_Range = tf.tile( tf.expand_dims(tf.expand_dims(tf.cast(tf.range(tf.shape(concat_XY)[2]) + 1, dtype=tf.float32), axis = 0), axis = 1), multiples=[tf.shape(concat_XY)[0], tf.shape(concat_XY)[1], 1] ) #[M, L, (A+B)] sum_Rank_Y = tf.reduce_sum(y_Map * tiled_Range, axis=2) #[M, L] wilcoxon_Value = x_Size * y_Size + (y_Size * (y_Size + 1) / 2) - sum_Rank_Y mean_Wilconxon = x_Size * y_Size / 2 #Because, W1 + W2 = n1n2. s = tf.sqrt(x_Size * y_Size * (x_Size + y_Size + 1) / 12) z_Score = (wilcoxon_Value - mean_Wilconxon) / s p_Value = tf.cast(1 - tf.erf(tf.abs(tf.cast(z_Score, tf.float64)) / tf.sqrt(tf.cast(2.0, tf.float64))), tf.float32) #To know more detail p-value (float32 cannot cover z-score which is over 5.6) z_Score = tf.identity(z_Score, name="wilcoxon_Rank_Sum_Test_Z_Score") p_Value = tf.identity(p_Value, name="wilcoxon_Rank_Sum_Test_P_Value") return z_Score, p_Value
def input_target_xi(s, d, s_len, d_len, SNR, N_w, N_s, NFFT, f_s, mu, sigma): ''' Input features and target (mapped a priori SNR) for polar form acoustic-domain. Inputs: s - clean speech (dtype=tf.int32). d - noise (dtype=tf.int32). s_len - clean speech length without padding (samples). d_len - noise length without padding (samples). SNR - SNR level. N_w - time-domain window length (samples). N_s - time-domain window shift (samples). NFFT - number of acoustic-domain DFT components. f_s - sampling frequency (Hz). mu - sample mean. sigma - sample standard deviation. Outputs: x_MAG - noisy speech magnitude spectrum. xi_mapped - mapped a priori SNR (target). L - number of time-domain frames for each sequence. ''' (x, s, d) = add_noise_batch(s, d, s_len, d_len, SNR) L = num_frames( s_len, N_s ) # number of acoustic-domain frames for each sequence (uppercase eta). x_MAG, _ = polar.analysis(x, N_w, N_s, NFFT) s_MAG, _ = polar.analysis(s, N_w, N_s, NFFT) s_MAG = tf.boolean_mask(s_MAG, tf.sequence_mask(L)) d_MAG, _ = polar.analysis(d, N_w, N_s, NFFT) d_MAG = tf.boolean_mask(d_MAG, tf.sequence_mask(L)) xi = tf.truediv(tf.square(tf.maximum(s_MAG, 1e-12)), tf.square(tf.maximum(d_MAG, 1e-12))) # a priori SNR. xi_dB = tf.multiply(10.0, log10(xi)) # a priori SNR in dB. xi_mapped = tf.multiply( 0.5, tf.add( 1.0, tf.erf( tf.truediv(tf.subtract(xi_dB, mu), tf.multiply( sigma, tf.sqrt(2.0)))))) # mapped a priori SNR. return x_MAG, xi_mapped, L
def fully_variance_dense(input_tensor, num_inputs, num_outputs, mean_initializer, name, stochastic=True, reuse=False): with tf.variable_scope(name) as scope: W = tf.get_variable('W', [num_inputs, num_outputs], initializer=mean_initializer, dtype=tf.float32, trainable=False) log_sigma2 = tf.get_variable('log_sigma2', [num_inputs, num_outputs], initializer=tf.constant_initializer(-3.0), dtype=tf.float32, trainable=True) mu = tf.matmul(input_tensor, W) si = tf.sqrt(tf.matmul(input_tensor * input_tensor, tf.exp(log_sigma2)) + 1e-16) output = mu if stochastic: output += tf.random_normal(mu.shape, mean=0, stddev=1) * si # summaries if not reuse: error = 0.5*(1.0+tf.erf((-mu)/tf.sqrt(2.0)/si)) tf.summary.scalar('error', tf.reduce_sum(error)) #tf.summary.histogram('log_sigma2', log_sigma2) return output
def Batch_Correlation2D(x, y): """ Compute the correlations between each rows of two tensors. Main purpose is checking the correlations between the units of two layers Args: x: 3d tensor (BATCHxMxN). The number of first and third dimension should be same to y's first and third dimension. y: 3d tensor (BATCHxLxN). The number of first and third dimension should be same to x's first and third dimension. Returns: correlation_Tensor: A `Tensor` representing the correlation between the rows. Size is (BATCH x M x L) p_Value_Tensor: A `Tensor` representing the p-value of correlation. Size is (BATCH x M x L) """ t = tf.concat([x, y], axis=1) t_Min = tf.reduce_min(tf.abs(t)) + 1e-8 t_Max = tf.reduce_max(tf.abs(t)) x = x / t_Min * t_Max y = y / t_Min * t_Max avgsub_X_Tensor = x - tf.reduce_mean(x, axis=2, keepdims=True) #[Batch, M, N] avgsub_Y_Tensor = y - tf.reduce_mean(y, axis=2, keepdims=True) #[Batch, L, N] sumed_Pow_X_Tensor = tf.reduce_sum(tf.pow(avgsub_X_Tensor, 2), axis=2, keepdims=True) #[Batch, M, 1] sumed_Pow_Y_Tensor = tf.reduce_sum(tf.pow(avgsub_Y_Tensor, 2), axis=2, keepdims=True) #[Batch, L, 1] correlation_Tensor = tf.matmul( avgsub_X_Tensor, tf.transpose( avgsub_Y_Tensor, perm=[0, 2, 1])) / tf.sqrt( tf.matmul(sumed_Pow_X_Tensor, tf.transpose(sumed_Pow_Y_Tensor, perm=[0, 2, 1]))) #[Batch, M, L] p_Value_Tensor = 1 - tf.erf( tf.abs(correlation_Tensor) * tf.sqrt(tf.cast(tf.shape(x)[2], tf.float32)) / tf.sqrt(2.0)) #[M, L] correlation_Tensor = tf.identity(correlation_Tensor, name="correlation") p_Value_Tensor = tf.identity(p_Value_Tensor, name="p_value") return (correlation_Tensor, p_Value_Tensor)
def huber(y_true, y_pred): mu, sigma = y_pred[..., 0], y_pred[..., 1] mu = tf.reshape(mu, [-1, 4, 10, 1]) sigma = tf.reshape(sigma, [-1, 4, 10, 1]) inv_sigma_sq = 1. / tf.square(sigma) tau = k * sigma # tau = tf.clip_by_value(tau, 0.0, 1.0) abs_diff = tf.abs(y_true - mu) squared_diff = tf.square(y_true - mu) huber_loss = inv_sigma_sq * tf.where( tf.less(abs_diff, tau), 0.5 * squared_diff, (tau * abs_diff - 0.5 * tau * tau)) confidence_penalty = tf.log(sigma * np.sqrt(2. * np.pi) * tf.erf((tau / np.sqrt(2.)) / sigma) + (2. / tau) * tf.square(sigma) * tf.exp((-0.5 * tau * tau) * inv_sigma_sq)) return tf.reduce_sum(tf.add(huber_loss, confidence_penalty))
def KL(p, q, hypers=None, global_step=1.0E99): if isinstance(p, DiagonalGaussianVar): if isinstance(q, DiagonalGaussianVar): safe_qvar = q.var + bu.EPSILON entropy_term = 0.5 * (1 + bu.log2pi + tf.log(p.var)) cross_entropy_term = 0.5 * (bu.log2pi + tf.log(safe_qvar) + (p.var + (p.mean - q.mean)**2) / safe_qvar) return tf.reduce_sum(cross_entropy_term - entropy_term) elif isinstance(q, DiagonalLaplaceVar): sigma = tf.sqrt(p.var) mu_ovr_sigma = p.mean / sigma tmp = 2 * bu.standard_gaussian( mu_ovr_sigma) + mu_ovr_sigma * tf.erf( mu_ovr_sigma * bu.one_ovr_sqrt2) tmp *= sigma / q.b tmp += 0.5 * tf.log(2 * q.b * q.b / (pi * p.var)) - 0.5 return tf.reduce_sum(tmp) elif isinstance(q, InverseGammaVar): return EBKL(p, q, hypers, global_step) print('unsupported KL')
def runlic(vx, vy, L, magnitude=True): assert vx.shape == vy.shape N, M = vx.shape np.random.seed(13) tex = np.random.rand(N, M) tex_ = tf.placeholder(tf.float64, [N, M]) vx_ = tf.placeholder(tf.float64, [N, M]) vy_ = tf.placeholder(tf.float64, [N, M]) tex_out_ = line_integral_convolution(tex_, vx_, vy_, L, N, M, smax=0.8 * L) if magnitude: tex_out_ *= tf.erf(tf.sqrt(vx_**2 + vy_**2)) # tex_out_ = 1 - tex_out_ config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config): tex_out = tex_out_.eval(feed_dict={tex_: tex, vx_: vx, vy_: vy}) return tex_out
def gdOffsetLikelihood(y_true,e2,sigma_e2,a1_e2,a2_e2,n_e2,gausConstraints={}): """ implements the gaussian-double exponential+offset likelihood """ #reduced variables t = (y_true - e2)/sigma_e2 t1 = (math.pi + e2)/sigma_e2 t2 = (math.pi - e2)/sigma_e2 n1 = (sigma_e2/a1_e2)*K.exp(0.5*tf.pow(a1_e2,2))*(K.exp(-tf.pow(a1_e2,2)) - K.exp(-a1_e2*t1)) n2 = (sigma_e2/a2_e2)*K.exp(0.5*tf.pow(a2_e2,2))*(K.exp(-tf.pow(a2_e2,2)) - K.exp(-a2_e2*t2)) N = tf.where(tf.logical_and(tf.greater_equal(a1_e2, t1), tf.greater_equal(a2_e2, t2)), sqrt(math.pi/2)*sigma_e2*(tf.erf(t2/sqrt(2)) - tf.erf(-t1/sqrt(2))), tf.where(tf.logical_and(tf.greater(t1, a1_e2), tf.greater_equal(a2_e2, t2)), sqrt(math.pi/2)*sigma_e2*(tf.erf(t2/sqrt(2)) - tf.erf(-a1_e2/sqrt(2))) + n1, tf.where(tf.logical_and(tf.greater_equal(a1_e2, t1), tf.greater(t2, a2_e2)), sqrt(math.pi/2)*sigma_e2*(tf.erf(a2_e2/sqrt(2)) - tf.erf(-t1/sqrt(2))) + n2, sqrt(math.pi/2)*sigma_e2*(tf.erf(a2_e2/sqrt(2)) - tf.erf(-a1_e2/sqrt(2))) + n1 + n2 ) ) ) f = tf.where(tf.greater_equal(t, a2_e2), K.exp(0.5*tf.pow(a2_e2, 2) - a2_e2*t), tf.where(tf.greater_equal(t, -a1_e2), K.exp(-0.5*tf.pow(t,2)), K.exp(0.5*tf.pow(a1_e2, 2) + a1_e2*t) ) ) N = tf.clip_by_value(N,1e-5,9e12) nll = -K.log(n_e2 + f*(1-2*math.pi*n_e2)/N) nll = tf.where(tf.is_nan(nll), 500*tf.ones_like(nll), nll) nll = tf.where(tf.is_inf(nll), 500*tf.ones_like(nll), nll) return nll
def psi(t, tt, s=None, tau=None): if tt == 'basic': t = tf.cast(t, dtype=tf.float32) return 1 - t elif tt == 'exp': s = 10 if s is None else s t = tf.cast(t, dtype=tf.float32) c = (1 + tf.exp(-s / 2)) / (1 + tf.exp(s * (t - 1 / 2))) print(c.dtype) return c elif tt == 'ind': t = tf.cast(t, dtype=tf.float32) tau = tf.constant(0.75, dtype=tf.float32) if tau is None else tau ones = tf.ones(tf.convert_to_tensor(t.shape[0]), dtype=tf.float32) zeros = tf.zeros(tf.convert_to_tensor(t.shape[0]), dtype=tf.float32) bl = tf.where(t < tau, ones, zeros) res = (1 - t / tau) * bl res = tf.cast(res, dtype=tf.float32) return res elif tt == 'erf': t = tf.cast(t, dtype=tf.float32) return 1 - tf.erf(t) elif tt == 'exp_ind': tau = tf.constant(0.75, dtype=tf.float32) if tau is None else tau s = 10 if s is None else s t = tf.cast(t, dtype=tf.float32) bl = tf.where(t < tau, ones, zeros) c = (1 + tf.exp(-s / 2)) / (1 + tf.exp(s * (t - tau))) * bl print(c.dtype) return c
def fully_variance_conv_2d(input_tensor, filter_shape, input_channels, output_channels, mean_initializer, padding, name, stochastic=True, reuse=False): with tf.variable_scope(name) as scope: kernel = tf.get_variable('kernel', [filter_shape[0], filter_shape[1], input_channels, output_channels], initializer=mean_initializer, dtype=tf.float32, trainable=False) log_sigma2 = tf.get_variable('log_sigma2', [filter_shape[0], filter_shape[1], input_channels, output_channels], initializer=tf.constant_initializer(-3.0), dtype=tf.float32, trainable=True) conved_mu = tf.nn.conv2d(input_tensor, kernel, [1, 1, 1, 1], padding=padding) conved_si = tf.sqrt(tf.nn.conv2d(input_tensor * input_tensor, tf.exp(log_sigma2), [1, 1, 1, 1], padding=padding)+1e-16) output = conved_mu if stochastic: output += tf.random_normal(conved_mu.shape, mean=0, stddev=1) * conved_si # summaries if not reuse: error = 0.5*(1.0+tf.erf((-conved_mu)/tf.sqrt(2.0)/conved_si)) tf.summary.scalar('error', tf.reduce_sum(error)) #tf.summary.histogram('log_sigma2', log_sigma2) return output
def prob_is_largest(self, Y, mu, var, gh_x, gh_w): # work out what the mean and variance is of the indicated latent function. oh_on = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 1., 0.), float_type) mu_selected = tf.reduce_sum(oh_on * mu, 1) var_selected = tf.reduce_sum(oh_on * var, 1) # generate Gauss Hermite grid X = tf.reshape(mu_selected, (-1, 1)) + gh_x * tf.reshape( tf.sqrt(tf.clip_by_value(2. * var_selected, 1e-10, np.inf)), (-1, 1)) # compute the CDF of the Gaussian between the latent functions and the grid (including the selected function) dist = (tf.expand_dims(X, 1) - tf.expand_dims(mu, 2)) / tf.expand_dims( tf.sqrt(tf.clip_by_value(var, 1e-10, np.inf)), 2) cdfs = 0.5 * (1.0 + tf.erf(dist / np.sqrt(2.0))) cdfs = cdfs * (1 - 2e-4) + 1e-4 # blank out all the distances on the selected latent function oh_off = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 0., 1.), float_type) cdfs = cdfs * tf.expand_dims(oh_off, 2) + tf.expand_dims(oh_on, 2) # take the product over the latent functions, and the sum over the GH grid. return tf.matmul(tf.reduce_prod(cdfs, reduction_indices=[1]), tf.reshape(gh_w / np.sqrt(np.pi), (-1, 1)))
def Correlation2D(x, y): """ Compute the correlations between each rows of two tensors. Main purpose is checking the correlations between the units of two layers Args: x: 2d tensor (MxN). The number of second dimension should be same to y's second dimension. y: 2d tensor (LxN). The number of second dimension should be same to x's second dimension. Returns: correlation_Tensor: A `Tensor` representing the correlation between the rows. Size is (M x L) p_Value_Tensor: A `Tensor` representing the p-value of correlation. Size is (M x L) """ avgsub_X_Tensor = x - tf.reduce_mean(x, axis=1, keepdims=True) #[M, N] avgsub_Y_Tensor = y - tf.reduce_mean(y, axis=1, keepdims=True) #[L, N] sumed_Pow_X_Tensor = tf.reduce_sum(tf.pow(avgsub_X_Tensor, 2), axis=1, keepdims=True) #[M, 1] sumed_Pow_Y_Tensor = tf.reduce_sum(tf.pow(avgsub_Y_Tensor, 2), axis=1, keepdims=True) #[L, 1] correlation_Tensor = tf.matmul( avgsub_X_Tensor, tf.transpose(avgsub_Y_Tensor)) / tf.sqrt( tf.matmul(sumed_Pow_X_Tensor, tf.transpose(sumed_Pow_Y_Tensor))) #[M, L] p_Value_Tensor = 1 - tf.erf( tf.abs(correlation_Tensor) * tf.sqrt(tf.cast(tf.shape(x)[1], tf.float32)) / tf.sqrt(2.0)) #[M, L] correlation_Tensor = tf.identity(correlation_Tensor, name="correlation") p_Value_Tensor = tf.identity(p_Value_Tensor, name="p_value") return (correlation_Tensor, p_Value_Tensor)
def neural_network(X, W_0, W_1, b_0, b_1): # set up the BNN structure using tf if self.activation_fn == 'relu': h = tf.maximum(tf.matmul(X, W_0) + b_0,0) # relu elif self.activation_fn == 'Lrelu': a=0.2 h = tf.maximum(tf.matmul(X, W_0) + b_0,a* (tf.matmul(X, W_0) + b_0)) # leakly relu elif self.activation_fn == 'erf': h = tf.erf(tf.matmul(X, W_0) + b_0) elif self.activation_fn == 'tanh': h = tf.tanh(tf.matmul(X, W_0) + b_0) # h = tf.tanh(1.23*tf.matmul(X, W_0) + b_0) # add 1.23 for close to GP erf elif self.activation_fn == 'sigmoid': h = tf.sigmoid(tf.matmul(X, W_0) + b_0) elif self.activation_fn == 'softplus': self.c=2. # if this is bigger -> relu behaviour, but less 'soft' h = tf.divide(tf.log(tf.exp(tf.multiply(tf.matmul(X, W_0) + b_0,c)) + 1),c) elif self.activation_fn == 'rbf': self.beta_2 = 1/(2*self.g_var) h = tf.exp(-self.beta_2*tf.square(X - W_0)) h = tf.matmul(h, W_1) #+ b_1 return tf.reshape(h, [-1])
def test_Erf(self): t = tf.erf(self.random(4, 3)) self.check(t)
def Phi(x): return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
def inv_probit(x, sigma=np.sqrt(2.0)): ''' Inverse probit function. NB: do not take log of this function as it will result in underflow for large negative x. ''' return 0.5 * (1.0 + tf.erf(x / sigma))
def _cdf(self, x): truncated_x = tf.nn.relu(x) return tf.erf(truncated_x / self.scale / np.sqrt(2.0))
def __init__(self, X, label, valid_X, valid_label, input_node, output_node,hidden_layers_node, learning_rate=0.001, learning_rate_decay=1.0, activation='tanh', L2_reg=0.0, L1_reg=0.0, optimizer='sgd', dropout_keep_prob=1.0, feature_selection=False, seed=1, sigma=0.5, lam=0.005, standardize=False ): """ L2DeepSurv Class Constructor. Parameters: X: np.array, covariate variables. label: dict, like {'e': event, 't': time}, Observation and Time in survival analyze. input_node: int, number of covariate variables. hidden_layers_node: list, hidden layers in network. output_node: int, number of output. learning_rate: float, learning rate. learning_rate_decay: float, decay of learning rate. activation: string, type of activation function. L1_reg: float, coefficient of L1 regularizate item. L2_reg: float, coefficient of L2 regularizate item. optimizer: string, type of optimize algorithm. dropout_keep_prob: float, probability of dropout. seed: set random state. Returns: L2DeepSurv Class. """ # Register gates hyperparameters self.lam = lam self.sigma = sigma # Prepare data ''' self.train_data = {} self.train_data['X'], self.train_data['E'], \ self.train_data['T'], self.train_data['failures'], \ self.train_data['atrisk'], self.train_data['ties'] = utils.parse_data(X, label) self.valid_data = {} self.valid_data['X'], self.valid_data['E'], \ self.valid_data['T'], self.valid_data['failures'], \ self.valid_data['atrisk'], self.valid_data['ties'] = utils.parse_data(valid_X, valid_label) ''' self.train_data={} self.train_data['X'], self.train_data['E'], \ self.train_data['T'] = utils.prepare_data(X, label) self.train_data['ties']='noties' self.valid_data={} self.valid_data['X'], self.valid_data['E'], \ self.valid_data['T'] = utils.prepare_data(valid_X, valid_label) self.valid_data['ties']='noties' # New Graph G = tf.Graph() with G.as_default(): # Data input X = tf.placeholder(tf.float32, [None, input_node], name = 'x-Input') y_ = tf.placeholder(tf.float32, [None, output_node], name = 'label-Input') train_gates = tf.placeholder(tf.float32, [1], name='train_gates') # hidden layers self.nnweights = [] # collect weights of network prev_node = input_node prev_x = X with tf.variable_scope('gates', reuse=tf.AUTO_REUSE): self.alpha = tf.get_variable('alpha', [prev_node,], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01)) prev_x = self.feature_selector(prev_x, train_gates) for i in range(len(hidden_layers_node)): layer_name = 'layer' + str(i+1) with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE): weights = tf.get_variable('weights', [prev_node, hidden_layers_node[i]], initializer=tf.truncated_normal_initializer(stddev=0.1)) self.nnweights.append(weights) biases = tf.get_variable('biases', [hidden_layers_node[i]], initializer=tf.constant_initializer(0.0)) layer_out = tf.nn.dropout(tf.matmul(prev_x, weights) + biases, dropout_keep_prob) if activation == 'relu': layer_out = tf.nn.relu(layer_out) elif activation == 'selu': layer_out = tf.nn.selu(layer_out) elif activation == 'sigmoid': layer_out = tf.nn.sigmoid(layer_out) elif activation == 'tanh': layer_out = tf.nn.tanh(layer_out) else: raise NotImplementedError('activation not recognized') prev_node = hidden_layers_node[i] prev_x = layer_out # output layers layer_name = 'layer_last' with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE): weights = tf.get_variable('weights', [prev_node, output_node], initializer=tf.truncated_normal_initializer(stddev=0.1)) self.nnweights.append(weights) biases = tf.get_variable('biases', [output_node], initializer=tf.constant_initializer(0.0)) layer_out = tf.matmul(prev_x, weights) + biases # Output of Network y = layer_out # Global step with tf.variable_scope('training_step', reuse=tf.AUTO_REUSE): global_step = tf.get_variable("global_step", [], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) # Loss value ## L1 - L2 Regularization reg_item = tf.contrib.layers.l1_l2_regularizer(L1_reg, L2_reg) reg_term = tf.contrib.layers.apply_regularization(reg_item, self.nnweights) if feature_selection: ## Regularization reg = 0.5 - 0.5*tf.erf((-1/(2) - self.alpha)/(self.sigma*np.sqrt(2))) reg_gates = tf.reduce_mean(reg) * self.lam ## Negative log likelihood loss_fun = self._negative_log_likelihood(y_, y) if feature_selection: loss = loss_fun + reg_term + reg_gates else: loss = loss_fun + reg_term # SGD Optimizer if optimizer == 'sgd': lr = tf.train.exponential_decay( learning_rate, global_step, 1, learning_rate_decay ) train_step = tf.train.GradientDescentOptimizer(lr).minimize(loss, global_step=global_step) elif optimizer == 'adam': train_step = tf.train.GradientDescentOptimizer(learning_rate).\ minimize(loss, global_step=global_step) else: raise NotImplementedError('activation not recognized') # init op init_op = tf.global_variables_initializer() # Create a saver self.saver = tf.train.Saver() # Save into class members self.X = X self.y_ = y_ self.y = y self.train_gates = train_gates self.global_step = global_step self.loss = loss self.train_step = train_step self.configuration = { 'input_node': input_node, 'hidden_layers_node': hidden_layers_node, 'output_node': output_node, 'learning_rate': learning_rate, 'learning_rate_decay': learning_rate_decay, 'activation': activation, 'L1_reg': L1_reg, 'L2_reg': L2_reg, 'optimizer': optimizer, 'dropout': dropout_keep_prob } # Set random state tf.set_random_seed(seed) # create new Session for the DeepSurv Class self.sess = tf.Session(graph=G) # Initialize all global variables self.sess.run(init_op)
def gelu(x): return 0.5 * x * (1.0 + tf.erf(x / tf.sqrt(2.0)))
def inv_probit(x): jitter = 1e-3 # ensures output is strictly between 0 and 1 return 0.5 * (1.0 + tf.erf(x / np.sqrt(2.0))) * (1 - 2 * jitter) + jitter
def probit(x): return 0.5 * (1.0 + tf.erf(x / tf.sqrt(2.0)))
def gelu(x): cdf = 0.5 * (1.0 + tf.erf(x / tf.sqrt(2.0))) return x * cdf
def probit(x): return 0.5 * (1.0 + tf.erf(x / np.sqrt(2.0))) * (1 - 2e-3) + 1e-3
def probit(x): return 0.5*(1.0+tf.erf(x/np.sqrt(2.0))) * (1-2e-3) + 1e-3