def nac_complex_single_layer(x_in, out_units, epsilon = 0.000001): ''' :param x_in: input feature vector :param out_units: number of output units of the cell :param epsilon: small value to avoid log(0) in the output result :return: associated weight matrix and output tensor ''' in_shape = x_in.shape[1] W_hat = tf.get_variable(shape=[in_shape, out_units], initializer= tf.initializers.random_uniform(minval=-2, maxval=2), trainable=True, name="W_hat2") M_hat = tf.get_variable(shape=[in_shape, out_units], initializer=tf.initializers.random_uniform(minval=-2, maxval=2), trainable=True, name="M_hat2") W = tf.nn.tanh(W_hat) * tf.nn.sigmoid(M_hat) # Express Input feature in log space to learn complex functions x_modified = tf.asinh(x_in) m = tf.sinh( tf.matmul(x_modified, W) ) return m, W
def arcsinh(x, alpha=1): """ asignh See Also: logtanh """ return tf.asinh(x * alpha) / alpha
def call_with_critical_point_scanner(f, *args): """Calls f(scanner, *args) in TensorFlow session-context. Here, `scanner` will be a function with signature scanner(seed:int, scale:float) -> (potential, stationarity, pos_vector). The function `scanner` can only perform a scan when called from within the TF session-context that is set up by this function. """ graph = tf.Graph() with graph.as_default(): t_input = tf.placeholder(tf.float64, shape=[70]) t_v70 = tf.Variable(initial_value=numpy.zeros([70]), trainable=True, dtype=tf.float64) op_assign_input = tf.assign(t_v70, t_input) d = tf_so8_sugra_potential(t_v70) t_potential = d['potential'] t_stationarity = tf_so8_sugra_stationarity(d['a1'], d['a2']) opt = contrib_opt.ScipyOptimizerInterface(tf.asinh(t_stationarity), options=dict(maxiter=500)) with tf.Session() as sess: sess.run([tf.global_variables_initializer()]) def scanner(seed, scale): rng = numpy.random.RandomState(seed) v70 = rng.normal(scale=scale, size=[70]) sess.run([op_assign_input], feed_dict={t_input: v70}) opt.minimize(session=sess) n_ret = sess.run([t_potential, t_stationarity, t_v70]) return n_ret return f(scanner, *args)
def arcsinh(x, alpha=1): """ asignh See Also: logtanh """ return tf.asinh(x * alpha) / alpha
def __call__(self, input, reuse=False, is_training=False): with tf.variable_scope(self.name): # setup layer x = input input_size = input.shape[1].value g = tf.get_variable( "_w_g_", [input_size, self.output_size], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=.01), trainable=is_training) wt = tf.get_variable( "_w_wt_", [input_size, self.output_size], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=.01), trainable=is_training) mt = tf.get_variable( "_w_mt_", [input_size, self.output_size], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=.01), trainable=is_training) with tf.variable_scope('nac_w'): w = tf.multiply(tf.tanh(wt), tf.sigmoid(mt)) with tf.variable_scope('simple_nac'): a = tf.matmul(x, w) with tf.variable_scope('complex_nac'): # m = tf.exp( self._mult_div_nac( tf.log( tf.abs( x ) + 1e-10 ) ) ) m = tf.sinh(tf.matmul(tf.asinh(x), w)) with tf.variable_scope('math_gate'): gc = tf.sigmoid(tf.matmul(x, g)) with tf.variable_scope('result'): x = (gc * a) + ((1 - gc) * m) # activation if not self.act is None: x = self.act(x) # setup dropout if self.dropout > 0 and is_training: x = tf.layers.dropout(inputs=x, rate=self.dropout) if not reuse: self.layer = x print(x) return x
def _forward_log_det_jacobian(self, x): # y = sinh((arcsinh(x) + skewness) * tailweight) # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1), # dy/dx # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1) # This is computed inside the log to avoid catastrophic cancellations # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1). return (tf.log( tf.cosh((tf.asinh(x) + self.skewness) * self.tailweight) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x). / _sqrtx2p1(x)) + tf.log(self.tailweight))
def __call__(self, input): """ Performs forward propagation for the NAC cell :param input: a tensorflow input tensor :return: the outputs of the forward propagation """ g = tf.sigmoid(tf.matmul(self._g, input)) a = self._add_sub_nac(input) m = tf.sinh(self._mult_div_nac(tf.asinh((input)))) y = tf.multiply(g, a) + tf.multiply(1 - g, m) return y
def _forward_log_det_jacobian(self, x): # y = sinh((arcsinh(x) + skewness) * tailweight) # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1), # dy/dx # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1) # This is computed inside the log to avoid catastrophic cancellations # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1). return (tf.log( tf.cosh((tf.asinh(x) + self.skewness) * self.tailweight) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x). / _sqrtx2p1(x)) + tf.log(self.tailweight))
def _inverse_log_det_jacobian(self, y): # x = sinh(arcsinh(y) / tailweight - skewness) # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1), # dx/dy # = cosh(arcsinh(y) / tailweight - skewness) # / (tailweight * sqrt(y**2 + 1)) # This is computed inside the log to avoid catastrophic cancellations # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1). return (tf.log( tf.cosh(tf.asinh(y) / self.tailweight - self.skewness) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x). / _sqrtx2p1(y)) - tf.log(self.tailweight))
def _inverse_log_det_jacobian(self, y): # x = sinh(arcsinh(y) / tailweight - skewness) # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1), # dx/dy # = cosh(arcsinh(y) / tailweight - skewness) # / (tailweight * sqrt(y**2 + 1)) # This is computed inside the log to avoid catastrophic cancellations # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1). return (tf.log( tf.cosh(tf.asinh(y) / self.tailweight - self.skewness) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x). / _sqrtx2p1(y)) - tf.log(self.tailweight))
def get_scanner(output_path, maxiter=1000, stationarity_threshold=1e-7): """Obtains a basic TensorFlow-based scanner for extremal points.""" graph = tf.Graph() with graph.as_default(): tf_scalar_evaluator = get_tf_scalar_evaluator() t_input = tf.compat.v1.placeholder(tf.float64, shape=[70]) t_v70 = tf.Variable(initial_value=numpy.zeros([70]), trainable=True, dtype=tf.float64) op_assign_input = tf.compat.v1.assign(t_v70, t_input) sinfo = tf_scalar_evaluator(tf.cast(t_v70, tf.complex128)) t_potential = sinfo.potential # t_stationarity = sinfo.stationarity op_opt = contrib_opt.ScipyOptimizerInterface( tf.asinh(t_stationarity), options={'maxiter': maxiter}) # def scanner(seed, scale=0.1, num_iterations=1): results = collections.defaultdict(list) rng = numpy.random.RandomState(seed) with graph.as_default(): with tf.compat.v1.Session() as sess: sess.run([tf.compat.v1.global_variables_initializer()]) for n in range(num_iterations): v70 = rng.normal(scale=scale, size=[70]) sess.run([op_assign_input], feed_dict={t_input: v70}) op_opt.minimize(sess) n_pot, n_stat, n_v70 = sess.run( [t_potential, t_stationarity, t_v70]) if n_stat <= stationarity_threshold: results[S_id(n_pot)].append( (n, n_pot, n_stat, list(n_v70))) # Overwrite output at every iteration. if output_path is not None: tmp_out = output_path + '.tmp' with open(tmp_out, 'w') as h: h.write('n=%4d: p=%.12g s=%.12g\n' % (n, n_pot, n_stat)) h.write(pprint.pformat(dict(results))) os.rename(tmp_out, output_path) return dict(results) # return scanner
def transform_input_tf(flux, input_mode, distmod, is_hsc): base = 27.0 if is_hsc else 27.5 tmp = flux if 'scaled' in input_mode: m = tf.maximum(tf.reduce_max(tmp), 1.0) tmp = tmp / m if 'magnitude' in input_mode: v = tf.asinh(tmp * 0.5) if 'absolute' in input_mode: # c = tf.constant(2.5 * np.log10(np.e), dtype=tf.float32) # tmp = 27.5 - v * c - distmod tmp = base - 2.5 * v / tf.math.log(10.0) - distmod else: tmp = base - 2.5 * v / tf.math.log(10.0) return tmp
def hyp_mlr(incoming, before_mlr_dim, num_classes, radius=1.0, reuse=tf.AUTO_REUSE, scope='HyperbolicMLR', mlr_geom='hyp'): """ Multi-logistic regression in hyperbolic space. :param incoming: incoming tensor with shape [batch_size x before_mlr_dim] :param before_mlr_dim: last dimension of the incoming tensor :param num_classes: number of output classes :param radius: radius of the Poincaré ball :param scope: scope for the operation :param mlr_geom: TODO :return: """ with tf.variable_scope(scope, reuse=reuse): A_mlr = [] P_mlr = [] logits_list = [] for cl in range(num_classes): A_mlr.append(tf.get_variable('A_mlr' + str(cl), dtype=tf.float32, shape=[1, before_mlr_dim], initializer=tf.contrib.layers.xavier_initializer())) P_mlr.append(tf.get_variable('P_mlr' + str(cl), dtype=tf.float32, shape=[1, before_mlr_dim], initializer=tf.constant_initializer(0.0))) if mlr_geom == 'eucl': logits_list.append(tf.reshape(hyp_ops.tf_dot(-P_mlr[cl] + incoming, A_mlr[cl]), [-1])) elif mlr_geom == 'hyp': minus_p_plus_x = hyp_ops.tf_mob_add(-P_mlr[cl], incoming, radius) norm_a = hyp_ops.tf_norm(A_mlr[cl]) lambda_px = hyp_ops.tf_lambda_x(minus_p_plus_x, radius) px_dot_a = hyp_ops.tf_dot(minus_p_plus_x, tf.nn.l2_normalize(A_mlr[cl])) logit = 2. / np.sqrt(radius) * norm_a * tf.asinh(np.sqrt(radius) * px_dot_a * lambda_px) logits_list.append(tf.reshape(logit, [-1])) logits = tf.stack(logits_list, axis=1) return logits
def init_vars(self): # All calcs done in self.session self.session = (tf.get_default_session() if tf.get_default_session() else tf.InteractiveSession()) # All dynamical equations kept in assignments self.assignments_forces = tuple() self.assignments_points = tuple() self.dt0 = 1e-5 #tf.Variable(1e-5, dtype=tf.float32) self.dt = tf.Variable(1e-5, dtype=tf.float32) self.dt_ = tf.placeholder(tf.float32) # initialize variables self.it_num = 0 self.t = 0 self.tv = [] # to save volume evolution self.th_mean = mean(self.params['links']['thickness']* self.link_weights ) self.nrad_mean = mean(self.params['nodes']['radius']) self.r_min = float32(min(self.nrad_mean, self.th_mean)/10.) self.f_mild = lambda x: self.r_min * tf.asinh( x / self.r_min ) # for assignments self.asg = {}
def __init__( self, pts=[], edg=[], fixed=False, JSON=None, keep_paths=False, PAIRS=PAIRS, POW=POW, POWn=POWn, POW_SN=POW_SN, # power extra factors of r_i+r_j in node repulsion **kw): """ pts: node positions edg: edge list fixed: if nodes should remain fixed JSON: if network should be loaded from a json file (specify file paths or file object) keep_paths: whether to keep original link trajectories inside JSON file kw: links = {'k': <spring constant>, 'thickness': <# or list of thicknesses>, 'amplitude': <of repulsive force>, 'ce': <cooling exponent for sim. annealing>, 'Temp0': <initial temperature as a fraction of thickness>, 'segs': <# of segments on each link>} nodes = {'amplitude': <of repulsive force>, 'radius': <range of repulsive Gaussian force>} net.points contians all link points. The link_link interaction matrix should now be part of the net.links object, not the individual links. This allows massive vectorization of all interactions. net.links: Now we have a single net_links object that contains info of all links. All that separates them is the net_links.idx dict which indexes which points in net_links.points belong to which link. net_links also contains all methods for interactions between links. """ tt.tic() # All calcs done in self.session self.session = tf.InteractiveSession() # All dynamical equations kept in assignments self.assignments = tuple() self.params = { 'links': { 'k': 1e1, 'amplitude': 5e2, 'thickness': .1, 'Temp0': .5, 'ce': 1000, 'segs': 5, 'weighted': 0 }, 'nodes': { 'amplitude': 5e2, 'radius': 1., 'weighted': 0 }, } self.gnam = 'E-ELF-sim' self.dt0 = 1e-5 #tf.Variable(1e-5, dtype=tf.float32) self.dt = tf.Variable(1e-5, dtype=tf.float32) self.dt_ = tf.placeholder(tf.float32) if JSON: self.get_JSON(JSON, kw) self.it_num = 0 kwl = (kw['links'] if 'links' in kw else {}) kwn = (kw['nodes'] if 'nodes' in kw else {}) self.params['links'].update(kwl) self.params['nodes'].update(kwn) self.fixed = fixed self.keep_paths = keep_paths self.PAIRS = PAIRS self.params['POW'] = self.POW = POW self.params['POWn'] = self.POWn = POWn self.params['POW_SN'] = self.POW_SN = POW_SN if not JSON: self.pts = array(pts) #tf.Variable(pts, dtype=tf.float32) self.elist = array( edg, dtype=int32)[:, :2] #tf.Variable(edg, dtype=tf.float32) self.link_weights = (array(edg)[:, 2] if self.params['links']['weighted'] else array([1] * len(self.elist))) if 'labels' in self.params['nodes']: self.make_link_labels() self.it_num = 0 self.t = 0 self.tv = [] # to save volume evolution self.curr_keys = [] # initialize variables self.next_binning = 0 # it_num of next binning self.bin_size = 10 # initial default factor #self.links.bin_rand(self.bin_size) #self.th_mean = mean(self.links.thickness) #mean(self.params['links']['thickness']) self.th_mean = mean(self.params['links']['thickness'] * self.link_weights) self.nrad_mean = mean(self.params['nodes']['radius']) self.r_min = float32(min(self.nrad_mean, self.th_mean) / 10.) self.f_mild = lambda x: self.r_min * tf.asinh(x / self.r_min) #self._init(**kw) # for assignments self.asg = {} tt.toc() print "Making links...", self.net_links(net=self, **self.params['links']) #(**kwl) tt.toc() print "Making nodes...", self.net_nodes(net=self, **self.params['nodes']) #(**kwn) # we'll have variable thicknesses #self.gnam += '-th%.3g-r%.3g'%(self.params['links']['thickness'],self.params['nodes']['radius']) tt.toc() print "initializing global variables...", init = tf.global_variables_initializer() self.session.run(init) tt.toc() print "Initial binning..." self.rebin() tt.toc() print "setup: dt...", self.setup_dt() tt.toc() print "setup: volume...", self.vol = tf.reduce_sum(vec_len(self.links.dp)) #self.vol = tf.reduce_sum([self.links.lens(l) for l in self.links.idx]) tt.toc() print "setup: dynamics...", # Define the comp group and iteraion steps self.step = tf.group(*self.assignments) tt.toc() print "setup: dynamics 2...", #self.dyn = ['dt']+(['N'] if not self.fixed else [])+['NL','LF_Int'] self.dyn = (['N'] if not self.fixed else []) + ['NL', 'LF_Int' ] #+ ['dt'] self.step2 = tf.group(*[self.asg[k][0] for k in self.dyn]) self.step3 = tf.group(*self.asg['dt']) tt.toc() print "Done!",
def construct_execution_graph(self): # Collect vars separately. Word embeddings are not used here. eucl_vars = [] hyp_vars = [] ################## word embeddings ################### # Initialize word embeddings close to 0, to have average norm equal to word_init_avg_norm. maxval = (3. * (word_init_avg_norm**2) / (2. * word_dim))**(1. / 3) initializer = tf.random_uniform_initializer(minval=-maxval, maxval=maxval, dtype=dtype) self.embeddings = tf.get_variable( 'embeddings', dtype=dtype, shape=[len(self.word_to_id), word_dim], initializer=initializer) if inputs_geom == 'eucl': eucl_vars += [self.embeddings] ################## RNNs for sentence embeddings ################### if cell_type == 'TFrnn': assert sent_geom == 'eucl' cell_class = lambda h_dim: tf.contrib.rnn.BasicRNNCell(h_dim) elif cell_type == 'TFgru': assert sent_geom == 'eucl' cell_class = lambda h_dim: tf.contrib.rnn.GRUCell(h_dim) elif cell_type == 'TFlstm': assert sent_geom == 'eucl' cell_class = lambda h_dim: tf.contrib.rnn.BasicLSTMCell(h_dim) elif cell_type == 'rnn' and sent_geom == 'eucl': cell_class = lambda h_dim: rnn_impl.EuclRNN(h_dim, dtype=dtype) elif cell_type == 'gru' and sent_geom == 'eucl': cell_class = lambda h_dim: rnn_impl.EuclGRU(h_dim, dtype=dtype) elif cell_type == 'rnn' and sent_geom == 'hyp': cell_class = lambda h_dim: rnn_impl.HypRNN( num_units=h_dim, inputs_geom=inputs_geom, bias_geom=bias_geom, c_val=c_val, non_lin=cell_non_lin, fix_biases=fix_biases, fix_matrices=fix_matrices, matrices_init_eye=matrices_init_eye, dtype=dtype) elif cell_type == 'gru' and sent_geom == 'hyp': cell_class = lambda h_dim: rnn_impl.HypGRU( num_units=h_dim, inputs_geom=inputs_geom, bias_geom=bias_geom, c_val=c_val, non_lin=cell_non_lin, fix_biases=fix_biases, fix_matrices=fix_matrices, matrices_init_eye=matrices_init_eye, dtype=dtype) else: logger.error('Not valid cell type: %s and sent_geom %s' % (cell_type, sent_geom)) exit() # RNN 1 with tf.variable_scope(cell_type + '1'): word_embeddings_1 = tf.nn.embedding_lookup( self.embeddings, self.word_ids_1) # bs x num_w_s1 x dim cell_1 = cell_class(hidden_dim) initial_state_1 = cell_1.zero_state(batch_size, dtype) outputs_1, state_1 = tf.nn.dynamic_rnn( cell=cell_1, inputs=word_embeddings_1, dtype=dtype, initial_state=initial_state_1, sequence_length=self.num_words_1) if cell_type == 'TFlstm': self.sent_1 = state_1[1] else: self.sent_1 = state_1 sent1_norm = util.tf_norm(self.sent_1) # RNN 2 with tf.variable_scope(cell_type + '2'): word_embeddings_2 = tf.nn.embedding_lookup(self.embeddings, self.word_ids_2) # tf.summary.scalar('word_emb2', tf.reduce_mean(tf.norm(word_embeddings_2, axis=2))) cell_2 = cell_class(hidden_dim) initial_state_2 = cell_2.zero_state(batch_size, dtype) outputs_2, state_2 = tf.nn.dynamic_rnn( cell=cell_2, inputs=word_embeddings_2, dtype=dtype, initial_state=initial_state_2, sequence_length=self.num_words_2) if cell_type == 'TFlstm': self.sent_2 = state_2[1] else: self.sent_2 = state_2 sent2_norm = util.tf_norm(self.sent_2) tf.summary.scalar('RNN/word_emb1', tf.reduce_mean(tf.norm(word_embeddings_1, axis=2))) tf.summary.scalar('RNN/sent1', tf.reduce_mean(sent1_norm)) tf.summary.scalar('RNN/sent2', tf.reduce_mean(sent2_norm)) eucl_vars += cell_1.eucl_vars + cell_2.eucl_vars if sent_geom == 'hyp': hyp_vars += cell_1.hyp_vars + cell_2.hyp_vars ## Compute d(s1, s2) if sent_geom == 'eucl': d_sq_s1_s2 = util.tf_euclid_dist_sq(self.sent_1, self.sent_2) else: d_sq_s1_s2 = util.tf_poinc_dist_sq(self.sent_1, self.sent_2, c=c_val) ##### Some summaries: # For summaries and debugging, we need these: pos_labels = tf.reshape(tf.cast(self.label_placeholder, tf.float64), [-1, 1]) neg_labels = 1. - pos_labels weights_pos_labels = pos_labels / tf.reduce_sum(pos_labels) weights_neg_labels = neg_labels / tf.reduce_sum(neg_labels) ################## first feed forward layer ################### # Define variables for the first feed-forward layer: W1 * s1 + W2 * s2 + b + bd * d(s1,s2) W_ff_s1 = tf.get_variable( 'W_ff_s1', dtype=dtype, shape=[hidden_dim, before_mlr_dim], initializer=tf.contrib.layers.xavier_initializer()) W_ff_s2 = tf.get_variable( 'W_ff_s2', dtype=dtype, shape=[hidden_dim, before_mlr_dim], initializer=tf.contrib.layers.xavier_initializer()) b_ff = tf.get_variable('b_ff', dtype=dtype, shape=[1, before_mlr_dim], initializer=tf.constant_initializer(0.0)) b_ff_d = tf.get_variable('b_ff_d', dtype=dtype, shape=[1, before_mlr_dim], initializer=tf.constant_initializer(0.0)) eucl_vars += [W_ff_s1, W_ff_s2] if ffnn_geom == 'eucl' or bias_geom == 'eucl': eucl_vars += [b_ff] if additional_features == 'dsq': eucl_vars += [b_ff_d] else: hyp_vars += [b_ff] if additional_features == 'dsq': hyp_vars += [b_ff_d] if ffnn_geom == 'eucl' and sent_geom == 'hyp': # Sentence embeddings are Euclidean after log, except the proper distance (Eucl or hyp) is kept! self.sent_1 = util.tf_log_map_zero(self.sent_1, c_val) self.sent_2 = util.tf_log_map_zero(self.sent_2, c_val) ####### Build output_ffnn ####### if ffnn_geom == 'eucl': output_ffnn = tf.matmul(self.sent_1, W_ff_s1) + tf.matmul( self.sent_2, W_ff_s2) + b_ff if additional_features == 'dsq': # [u, v, d(u,v)^2] output_ffnn = output_ffnn + d_sq_s1_s2 * b_ff_d else: assert sent_geom == 'hyp' ffnn_s1 = util.tf_mob_mat_mul(W_ff_s1, self.sent_1, c_val) ffnn_s2 = util.tf_mob_mat_mul(W_ff_s2, self.sent_2, c_val) output_ffnn = util.tf_mob_add(ffnn_s1, ffnn_s2, c_val) hyp_b_ff = b_ff if bias_geom == 'eucl': hyp_b_ff = util.tf_exp_map_zero(b_ff, c_val) output_ffnn = util.tf_mob_add(output_ffnn, hyp_b_ff, c_val) if additional_features == 'dsq': # [u, v, d(u,v)^2] hyp_b_ff_d = b_ff_d if bias_geom == 'eucl': hyp_b_ff_d = util.tf_exp_map_zero(b_ff_d, c_val) output_ffnn = util.tf_mob_add( output_ffnn, util.tf_mob_scalar_mul(d_sq_s1_s2, hyp_b_ff_d, c_val), c_val) if ffnn_geom == 'eucl': output_ffnn = util.tf_eucl_non_lin(output_ffnn, non_lin=ffnn_non_lin) else: output_ffnn = util.tf_hyp_non_lin(output_ffnn, non_lin=ffnn_non_lin, hyp_output=(mlr_geom == 'hyp' and dropout == 1.0), c=c_val) # Mobius dropout if dropout < 1.0: # If we are here, then output_ffnn should be Euclidean. output_ffnn = tf.nn.dropout(output_ffnn, keep_prob=self.dropout_placeholder) if (mlr_geom == 'hyp'): output_ffnn = util.tf_exp_map_zero(output_ffnn, c_val) ################## MLR ################### # output_ffnn is batch_size x before_mlr_dim A_mlr = [] P_mlr = [] logits_list = [] for cl in range(num_classes): A_mlr.append( tf.get_variable( 'A_mlr' + str(cl), dtype=dtype, shape=[1, before_mlr_dim], initializer=tf.contrib.layers.xavier_initializer())) eucl_vars += [A_mlr[cl]] P_mlr.append( tf.get_variable('P_mlr' + str(cl), dtype=dtype, shape=[1, before_mlr_dim], initializer=tf.constant_initializer(0.0))) if mlr_geom == 'eucl': eucl_vars += [P_mlr[cl]] logits_list.append( tf.reshape( util.tf_dot(-P_mlr[cl] + output_ffnn, A_mlr[cl]), [-1])) elif mlr_geom == 'hyp': hyp_vars += [P_mlr[cl]] minus_p_plus_x = util.tf_mob_add(-P_mlr[cl], output_ffnn, c_val) norm_a = util.tf_norm(A_mlr[cl]) lambda_px = util.tf_lambda_x(minus_p_plus_x, c_val) px_dot_a = util.tf_dot(minus_p_plus_x, tf.nn.l2_normalize(A_mlr[cl])) logit = 2. / np.sqrt(c_val) * norm_a * tf.asinh( np.sqrt(c_val) * px_dot_a * lambda_px) logits_list.append(tf.reshape(logit, [-1])) self.logits = tf.stack(logits_list, axis=1) self.argmax_idx = tf.argmax(self.logits, axis=1, output_type=tf.int32) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.label_placeholder, logits=self.logits)) tf.summary.scalar('classif/unreg_loss', self.loss) if reg_beta > 0.0: assert num_classes == 2 distance_regularizer = tf.reduce_mean( (tf.cast(self.label_placeholder, dtype=dtype) - 0.5) * d_sq_s1_s2) self.loss = self.loss + reg_beta * distance_regularizer self.acc = tf.reduce_mean( tf.to_float(tf.equal(self.argmax_idx, self.label_placeholder))) tf.summary.scalar('classif/accuracy', self.acc) ######################################## OPTIMIZATION ###################################### all_updates_ops = [] ###### Update Euclidean parameters using Adam. optimizer_euclidean_params = tf.train.AdamOptimizer(learning_rate=1e-3) eucl_grads = optimizer_euclidean_params.compute_gradients( self.loss, eucl_vars) capped_eucl_gvs = [(tf.clip_by_norm(grad, 1.), var) for grad, var in eucl_grads] ###### Clip gradients all_updates_ops.append( optimizer_euclidean_params.apply_gradients(capped_eucl_gvs)) ###### Update Hyperbolic parameters, i.e. word embeddings and some biases in our case. def rsgd(v, riemannian_g, learning_rate): if hyp_opt == 'rsgd': return util.tf_exp_map_x(v, -self.burn_in_factor * learning_rate * riemannian_g, c=c_val) else: # Use approximate RSGD based on a simple retraction. updated_v = v - self.burn_in_factor * learning_rate * riemannian_g # Projection op after SGD update. Need to make sure embeddings are inside the unit ball. return util.tf_project_hyp_vecs(updated_v, c_val) if inputs_geom == 'hyp': grads_and_indices_hyp_words = tf.gradients(self.loss, self.embeddings) grads_hyp_words = grads_and_indices_hyp_words[0].values repeating_indices = grads_and_indices_hyp_words[0].indices unique_indices, idx_in_repeating_indices = tf.unique( repeating_indices) agg_gradients = tf.unsorted_segment_sum( grads_hyp_words, idx_in_repeating_indices, tf.shape(unique_indices)[0]) agg_gradients = tf.clip_by_norm(agg_gradients, 1.) ######## Clip gradients unique_word_emb = tf.nn.embedding_lookup( self.embeddings, unique_indices) # no repetitions here riemannian_rescaling_factor = util.riemannian_gradient_c( unique_word_emb, c=c_val) rescaled_gradient = riemannian_rescaling_factor * agg_gradients all_updates_ops.append( tf.scatter_update( self.embeddings, unique_indices, rsgd(unique_word_emb, rescaled_gradient, lr_words))) # Updated rarely if len(hyp_vars) > 0: hyp_grads = tf.gradients(self.loss, hyp_vars) capped_hyp_grads = [ tf.clip_by_norm(grad, 1.) for grad in hyp_grads ] ###### Clip gradients for i in range(len(hyp_vars)): riemannian_rescaling_factor = util.riemannian_gradient_c( hyp_vars[i], c=c_val) rescaled_gradient = riemannian_rescaling_factor * capped_hyp_grads[ i] all_updates_ops.append( tf.assign(hyp_vars[i], rsgd(hyp_vars[i], rescaled_gradient, lr_ffnn))) # Updated frequently self.all_optimizer_var_updates_op = tf.group(*all_updates_ops) self.summary_merged = tf.summary.merge_all() self.test_summary_writer = tf.summary.FileWriter( os.path.join(root_path, 'tb_28may/' + tensorboard_name + '/'))
def __call__(self, x, is_training=False, filters=None): if not filters is None: self.output_size = filters with tf.variable_scope(self.name): # kernel shape k = [ self.kernel, self.kernel, x.shape[-1], self.output_size ] if type(self.kernel) is int \ else [ self.kernel[0], self.kernel[1], x.shape[-1], self.output_size ] # kernel gt = tf.get_variable( "_w_gt_2d", k, # initializer = tf.contrib.layers.xavier_initializer(), initializer=tf.truncated_normal_initializer(stddev=0.02), trainable=is_training) wt = tf.get_variable( "_w_wt_2d", k, # initializer = tf.contrib.layers.xavier_initializer(), initializer=tf.truncated_normal_initializer(stddev=0.02), trainable=is_training) mt = tf.get_variable( "_w_mt_2d", k, # initializer = tf.contrib.layers.xavier_initializer(), initializer=tf.truncated_normal_initializer(stddev=0.02), trainable=is_training) strides = ( 1, self.stride, self.stride, 1 ) if type(self.stride) is int \ else ( 1, self.stride[0], self.stride[1], 1 ) with tf.variable_scope('nac_w'): w = tf.multiply(tf.tanh(wt), tf.sigmoid(mt)) with tf.variable_scope('simple_nac'): a = tf.nn.conv2d(x, w, strides, padding=self.padding) with tf.variable_scope('complex_nac'): # m = tf.exp( tf.nn.conv2d( tf.log( tf.abs( x ) + 1e-10 ), w, strides, padding = self.padding ) ) m = tf.sinh( tf.nn.conv2d(tf.asinh(x), w, strides, padding=self.padding)) with tf.variable_scope('math_gate'): gc = tf.nn.sigmoid( tf.nn.conv2d(x, gt, strides, padding=self.padding)) with tf.variable_scope('result'): x = (gc * a) + ((1 - gc) * m) # if self.bias: # b = tf.compat.v1.get_variable( '_b_', # [ 1, 1, 1, self.output_size ], # initializer = tf.constant_initializer( 0.0 ), # trainable = is_training ) # x += b # batch normalization if self.bn: x = bn(x, is_training=is_training, name="_bn") # activation if not self.act is None: x = self.act(x, name="_act") # setup dropout if self.dropout > 0 and is_training: x = dropout(x, self.dropout, name="_dp") print(x) return x
def arcsinh(x, alpha=1): return tf.asinh(x * alpha) / alpha
def arsinh(x): result = tf.asinh(x) return result
def _inverse(self, y): return tf.sinh(tf.asinh(y) / self.tailweight - self.skewness)
#numpy implementation numpy_per = [] tf_per = [] for array in array_sizes: test_matrix = np.random.random((array, array, 2)) start = time.time() test_result = np.arcsinh(test_matrix) end = time.time() numpy_per.append(end - start) tf_input = tf.constant(test_matrix, dtype=tf.float64) tf_start = time.time() tf_result = tf.asinh(tf_input) tf_end = time.time() tf_per.append(tf_end - tf_start) for i in range(len(array_sizes)): print('Size:', array_sizes[i], ' numpy performance:', "%.5f" % numpy_per[i], ' tensorflow performance:', "%.5f" % tf_per[i]) #print('tensorflow performance :', tf_per) plt.figure(figsize=(5, 5)) plt.plot(array_sizes, numpy_per, color='green', label='numpy') plt.plot(array_sizes, tf_per, color='blue', label='tensorflow') plt.title('Tensorflow vs Numpy : Asinh') plt.savefig('Asinh.png')
def reashu(t): return nn.where(t < 0, 0, asinh(t))
def eta(self): return tf.asinh(self.pz / self.pt())
def cn(x): """compressive nonlinearity.""" return tf.asinh(4. * x) / 4.
def create(labels, embeddings, **kwargs): word_vec = embeddings['word'] char_vec = embeddings['char'] model = HyperbolicRNNModel() model.sess = kwargs.get('sess', tf.Session()) model.mxlen = kwargs.get('maxs', 100) model.maxw = kwargs.get('maxw', 100) hsz = int(kwargs['hsz']) pdrop = kwargs.get('dropout', 0.5) pdrop_in = kwargs.get('dropin', 0.0) rnntype = kwargs.get('rnntype', 'blstm') print(rnntype) layers = kwargs.get('layers', 1) model.labels = labels model.crf = bool(kwargs.get('crf', False)) model.crf_mask = bool(kwargs.get('crf_mask', False)) model.span_type = kwargs.get('span_type') model.proj = bool(kwargs.get('proj', False)) model.feed_input = bool(kwargs.get('feed_input', False)) model.activation_type = kwargs.get('activation', 'tanh') char_dsz = char_vec.dsz nc = len(labels) model.x = kwargs.get( 'x', tf.placeholder(tf.int32, [None, model.mxlen], name="x")) model.xch = kwargs.get( 'xch', tf.placeholder(tf.int32, [None, model.mxlen, model.maxw], name="xch")) model.y = kwargs.get( 'y', tf.placeholder(tf.int64, [None, model.mxlen], name="y")) model.lengths = kwargs.get( 'lengths', tf.placeholder(tf.int32, [None], name="lengths")) model.pkeep = kwargs.get('pkeep', tf.placeholder(tf.float64, name="pkeep")) model.pdrop_value = pdrop model.pdropin_value = pdrop_in model.word_vocab = {} inputs_geom = kwargs.get("inputs_geom", "hyp") bias_geom = kwargs.get("bias_geom", "hyp") ffnn_geom = kwargs.get("ffnn_geom", "hyp") sent_geom = kwargs.get("sent_geom", "hyp") mlr_geom = kwargs.get("mlr_geom", "hyp") c_val = kwargs.get("c_val", 1.0) cell_non_lin = kwargs.get("cell_non_lin", "id") #"id/relu/tanh/sigmoid." ffnn_non_lin = kwargs.get("ffnn_non_lin", "id") cell_type = kwargs.get("cell_type", 'rnn') lr_words = kwargs.get("lw_words", 0.01) lr_ffnn = kwargs.get("lr_ffnn", 0.01) optimizer = kwargs.get("optimizer", "rsgd") eucl_clip = kwargs.get("eucl_clip", 1.0) hyp_clip = kwargs.get("hyp_clip", 1.0) before_mlr_dim = kwargs.get("before_mlr_dim", nc) learn_softmax = kwargs.get("learn_softmax", True) batch_sz = 10 print("C_val:", c_val) eucl_vars = [] hyp_vars = [] if word_vec is not None: model.word_vocab = word_vec.vocab # model.char_vocab = char_vec.vocab seed = np.random.randint(10e8) if word_vec is not None: # word_embeddings = embed(model.x, len(word_vec.vocab), word_vec.dsz, # initializer=tf.constant_initializer(word_vec.weights, dtype=tf.float32, verify_shape=True)) with tf.variable_scope("LUT"): W = tf.get_variable("W", dtype=tf.float64, initializer=tf.constant_initializer( word_vec.weights, dtype=tf.float64, verify_shape=True), shape=[len(word_vec.vocab), word_vec.dsz], trainable=True) # e0 = tf.scatter_update(W, tf.constant(0, dtype=tf.int32, shape=[1]), tf.zeros(shape=[1, word_vec.dsz])) # with tf.control_dependencies([W]): word_embeddings = tf.nn.embedding_lookup(W, model.x) # Wch = tf.Variable(tf.constant(char_vec.weights, dtype=tf.float32), name="Wch") # ce0 = tf.scatter_update(Wch, tf.constant(0, dtype=tf.int32, shape=[1]), tf.zeros(shape=[1, char_dsz])) # word_char, _ = pool_chars(model.xch, Wch, ce0, char_dsz, **kwargs) # joint = word_char if word_vec is None else tf.concat(values=[word_embeddings, word_char], axis=2) # word_embeddings = tf.Print(word_embeddings, [word_embeddings], message="embeddings") embedseq = word_embeddings # embedseq = tf.nn.dropout(word_embeddings, model.pkeep) # if (mlr_geom == 'hyp'): # embedseq = util.tf_exp_map_zero(embedseq, c_val) if cell_type == 'rnn' and sent_geom == 'eucl': cell_class = lambda h_dim: tf.contrib.rnn.BasicRNNCell(h_dim) if cell_type == 'rnn' and sent_geom == 'hyp': cell_class = lambda h_dim, layer: LorentzRNN( num_units=h_dim, inputs_geom=inputs_geom, bias_geom=bias_geom, c_val=c_val, non_lin=cell_non_lin, fix_biases=False, fix_matrices=False, matrices_init_eye=False, dtype=tf.float64, layer=layer) # elif cell_type == 'gru' and sent_geom == 'hyp': # cell_class = lambda h_dim, layer: rnn_impl.HypGRU(num_units=h_dim, # inputs_geom=inputs_geom, # bias_geom=bias_geom, # c_val=c_val, # non_lin=cell_non_lin, # fix_biases=False, # fix_matrices=False, # matrices_init_eye=False, # dtype=tf.float64, # layer=layer) # elif cell_type == 'lstm' and sent_geom == 'hyp': # cell_class = lambda h_dim, layer: rnn_impl.HypLSTM(num_units=h_dim, # inputs_geom=inputs_geom, # bias_geom=bias_geom, # c_val=c_val, # non_lin=cell_non_lin, # fix_biases=False, # fix_matrices=False, # matrices_init_eye=False, # dtype=tf.float64, # layer=layer) rnnout = embedseq for i in range(layers): with tf.variable_scope('rnnLayers', reuse=tf.AUTO_REUSE): if rnntype == 'rnn': cell = cell_class(hsz, i) initial_state = cell.zero_state(batch_sz, tf.float64) # rnnout = tf.contrib.rnn.DropoutWrapper(cell) rnnout, state = tf.nn.dynamic_rnn(cell, rnnout, \ sequence_length=model.lengths, initial_state=initial_state, dtype=tf.float64) eucl_vars += cell.eucl_vars if sent_geom == 'hyp': hyp_vars += cell.hyp_vars elif rnntype == 'bi': cell_1 = cell_class(hsz, i) cell_2 = cell_class(hsz, i) init_fw = cell_1.zero_state(batch_sz, tf.float64) init_bw = cell_2.zero_state(batch_sz, tf.float64) rnnout, state = tf.nn.bidirectional_dynamic_rnn( cell_1, cell_2, rnnout, initial_state_fw=init_fw, initial_state_bw=init_bw, sequence_length=model.lengths, dtype=tf.float64) rnnout = tf.concat(axis=2, values=rnnout) eucl_vars += cell_1.eucl_vars + cell_2.eucl_vars if sent_geom == 'hyp': hyp_vars += cell_1.hyp_vars + cell_2.hyp_vars else: cell = cell_class(hsz) # rnnout = tf.contrib.rnn.DropoutWrapper(cell) rnnout, state = tf.nn.dynamic_rnn( cell, rnnout, sequence_length=model.lengths, dtype=tf.float64) eucl_vars += cell.eucl_vars if sent_geom == 'hyp': hyp_vars += cell.hyp_vars # rnnout = tf.Print(rnnout, [rnnout], message="rnnout") tf.summary.histogram('RNN/rnnout', rnnout) # # Converts seq to tensor, back to (B,T,W) hout = rnnout.get_shape()[-1] print(rnnout.get_shape()) # # Flatten from [B x T x H] - > [BT x H] with tf.variable_scope("fc"): rnnout_bt_x_h = tf.reshape(rnnout, [-1, hout]) # rnnout_bt_x_h = tf.Print(rnnout_bt_x_h, [rnnout_bt_x_h], message="rnnout_bt_x_h") ################## first feed forward layer ################### # Define variables for the first feed-forward layer: W1 * s1 + W2 * s2 + b + bd * d(s1,s2) W_ff_s1 = tf.get_variable( 'W_ff_s1', dtype=tf.float64, shape=[hout, before_mlr_dim], # 400, 20 -- 20 number of classes initializer=tf.contrib.layers.xavier_initializer( dtype=tf.float64)) tf.summary.histogram("W_ff_s1", W_ff_s1) # b_ff = tf.get_variable('b_ff', # dtype=tf.float64, # shape=[1, before_mlr_dim], # initializer=tf.constant_initializer(0.0)) # # TODO(MB): ffn should be in hyperbolic space, no? eucl_vars += [W_ff_s1] # hyp_vars += [b_ff] # #### treat W as an update in tangent space # # ffnn_s1 = rnnout_bt_x_h + W_ff_s1 + b_ff # # cheat for now. i don't know how to multiply these together first ffnn_s1 = lorentz.tf_mink_dot_matrix(rnnout_bt_x_h, tf.transpose(W_ff_s1)) # ffnn_s1 = W_ff_s1 + dotp * rnnout_bt_x_h # #### embed back into minkowski space # ffnn_s1 = lorentz.tf_exp_map_x(rnnout_bt_x_h, ffnn_s1, c_val) # print('ffnn', ffnn_s1.get_shape()) # tf.summary.histogram("ffnn_s1", ffnn_s1) output_ffnn = util.tf_hyp_non_lin( ffnn_s1, non_lin=ffnn_non_lin, hyp_output=True, #(mlr_geom == 'hyp'), c=c_val) tf.summary.histogram("output_ffnn", output_ffnn) # output_ffnn = tf.Print(output_ffnn, [output_ffnn], message="output_ffnn") # output_ffnn = dotp # ################## MLR ################### # # output_ffnn is batch_size x before_mlr_dim if not learn_softmax: probs = output_ffnn else: print("learning softmax in hyperbolic space") A_mlr = [] P_mlr = [] logits_list = [] dtype = tf.float64 print('output shape', output_ffnn.get_shape()) with tf.variable_scope("hyper_softmax"): for cl in range(nc): with tf.variable_scope('mlp'): A_mlr.append( tf.get_variable('A_mlr' + str(cl), dtype=dtype, shape=[1, before_mlr_dim], initializer=tf.contrib.layers. xavier_initializer())) eucl_vars += [A_mlr[cl]] P_mlr.append( tf.get_variable( 'P_mlr' + str(cl), dtype=dtype, shape=[1, before_mlr_dim], initializer=tf.constant_initializer(0.0))) if mlr_geom == 'eucl': eucl_vars += [P_mlr[cl]] logits_list.append( tf.reshape( util.tf_dot(-P_mlr[cl] + output_ffnn, A_mlr[cl]), [-1])) elif mlr_geom == 'hyp': hyp_vars += [P_mlr[cl]] minus_p_plus_x = util.tf_mob_add( -P_mlr[cl], output_ffnn, c_val) norm_a = util.tf_norm(A_mlr[cl]) lambda_px = util.tf_lambda_x(minus_p_plus_x, c_val) # blow-- P+X == [10, 20] tensor. A_mlr is also [10,20]. px_dot_a is [10, 1] px_dot_a = util.tf_dot( minus_p_plus_x, tf.nn.l2_normalize(A_mlr[cl])) logit = 2. / np.sqrt(c_val) * norm_a * tf.asinh( np.sqrt(c_val) * px_dot_a * lambda_px) logits_list.append(logit) probs = tf.stack(logits_list, axis=1) print("probs shape", probs.get_shape()) model.probs = tf.reshape(probs, [-1, model.mxlen, nc]) print("reshaped probs", model.probs.get_shape()) tf.summary.histogram("probs", model.probs) model.best = tf.argmax(model.probs, 2) model.loss = model.create_loss() # model.best = tf.argmax(model.probs, axis=1, output_type=tf.int32) # ######################################## OPTIMIZATION ###################################### all_updates_ops = [] # ###### Update Euclidean parameters using Adam. optimizer_euclidean_params = tf.train.AdamOptimizer(learning_rate=1e-3) eucl_grads = optimizer_euclidean_params.compute_gradients( model.loss, eucl_vars) capped_eucl_gvs = [(tf.clip_by_norm(grad, eucl_clip), var) for grad, var in eucl_grads] ###### Clip gradients all_updates_ops.append( optimizer_euclidean_params.apply_gradients(capped_eucl_gvs)) ###### Update Hyperbolic parameters, i.e. word embeddings and some biases in our case. def rsgd(v, riemannian_g, learning_rate): if optimizer == 'rsgd': return lorentz.tf_exp_map_x(v, -model.burn_in_factor * learning_rate * riemannian_g, c=c_val) else: # Use approximate RSGD based on a simple retraction. updated_v = v - model.burn_in_factor * learning_rate * riemannian_g # Projection op after SGD update. Need to make sure embeddings are inside the unit ball. return util.tf_project_hyp_vecs(updated_v, c_val) if inputs_geom == 'hyp': grads_and_indices_hyp_words = tf.gradients(model.loss, W) grads_hyp_words = grads_and_indices_hyp_words[0].values # grads_hyp_words = tf.Print(grads_hyp_words, [grads_hyp_words], message="grads_hyp_words") repeating_indices = grads_and_indices_hyp_words[0].indices unique_indices, idx_in_repeating_indices = tf.unique( repeating_indices) # unique_indices = tf.Print(unique_indices, [unique_indices], message="unique_indices") # idx_in_repeating_indices = tf.Print(idx_in_repeating_indices, [idx_in_repeating_indices], message="idx_in_repeating_indices") agg_gradients = tf.unsorted_segment_sum( grads_hyp_words, idx_in_repeating_indices, tf.shape(unique_indices)[0]) agg_gradients = tf.clip_by_norm(agg_gradients, hyp_clip) ######## Clip gradients # agg_gradients = tf.Print(agg_gradients, [agg_gradients], message="agg_gradients") unique_word_emb = tf.nn.embedding_lookup( W, unique_indices) # no repetitions here # unique_word_emb = tf.Print(unique_word_emb, [unique_word_emb], message="unique_word_emb") riemannian_rescaling_factor = util.riemannian_gradient_c( unique_word_emb, c=c_val) # riemannian_rescaling_factor = tf.Print(riemannian_rescaling_factor, [riemannian_rescaling_factor], message="rescl factor") rescaled_gradient = riemannian_rescaling_factor * agg_gradients # rescaled_gradient = tf.Print(rescaled_gradient, [rescaled_gradient], message="rescl gradient") all_updates_ops.append( tf.scatter_update( W, unique_indices, rsgd(unique_word_emb, rescaled_gradient, lr_words))) # Updated rarely if len(hyp_vars) > 0: hyp_grads = tf.gradients(model.loss, hyp_vars) capped_hyp_grads = [ tf.clip_by_norm(grad, hyp_clip) for grad in hyp_grads ] ###### Clip gradients for i in range(len(hyp_vars)): riemannian_rescaling_factor = util.riemannian_gradient_c( hyp_vars[i], c=c_val) rescaled_gradient = riemannian_rescaling_factor * capped_hyp_grads[ i] all_updates_ops.append( tf.assign(hyp_vars[i], rsgd(hyp_vars[i], rescaled_gradient, lr_ffnn))) # Updated frequently model.all_optimizer_var_updates_op = tf.group(*all_updates_ops) print("all ops: ", model.all_optimizer_var_updates_op) model.summary_merged = tf.summary.merge_all() model.test_summary_writer = tf.summary.FileWriter( './runs/hyper/' + str(os.getpid()), model.sess.graph) return model
def _forward(self, x): return tf.sinh((tf.asinh(x) + self.skewness) * self.tailweight)
def stash_old(x, lamb=1.1613326990732873, alpha=0.6521334159737763): return _tf.where(x <= 0.0, 2.0 * _tf.tanh(alpha * x), lamb * _tf.asinh(2.0 * alpha * x / lamb))
def _forward(self, x): return tf.sinh((tf.asinh(x) + self.skewness) * self.tailweight)
def stash(x, lamb=1.1613855392326946, alpha=0.6520042387583171): return _tf.where(x <= 0.0, 2.0 * _tf.tanh(alpha * x), lamb * _tf.asinh(2.0 * alpha * x / lamb))
# network parameter : biases # ml_p_b1 = tf.Variable( tf.zeros( [ ml_h_hidden ] ) ) ml_p_b2 = tf.Variable( tf.zeros( [ ml_h_input ] ) ) ## ## script - network topology ## # network topology : input layer # ml_g_input = tf.placeholder( tf.float32, [ None, ml_h_input ] ) # network topology : hidden layer # ml_g_hidden = tf.nn.relu( tf.add( tf.matmul( ml_g_input, ml_p_w1 ), ml_p_b1 ) ) # network topology : output layer # ml_g_output = tf.asinh( tf.add( tf.matmul( ml_g_hidden, ml_p_w2 ), ml_p_b2 ) ) ## ## script - network sub-topology ## # network topology : input layer # ml_s1_input = tf.placeholder( tf.float32, [ None, ml_h_input ] ) # network topology : output layer # ml_s1_output = tf.nn.relu( tf.add( tf.matmul( ml_s1_input, ml_p_w1 ),ml_p_b1 ) ) # network topology : input layer # ml_s2_input = tf.placeholder( tf.float32, [ None, ml_h_hidden ] ) # network topology : output layer #
import tensorflow as tf sess = tf.InteractiveSession() t = tf.constant([1.8, 2.2]) target = tf.asinh(t).eval() print target
def call(self, inputs): self.call_weights() if (not isinstance(inputs, ed.RandomVariable) and not isinstance(self.kernel, ed.RandomVariable) and not isinstance(self.bias, ed.RandomVariable)): return super(DenseDVI, self).call(inputs) inputs_mean, inputs_variance, inputs_covariance = get_moments(inputs) kernel_mean, kernel_variance, _ = get_moments(self.kernel) if self.use_bias: bias_mean, _, bias_covariance = get_moments(self.bias) # E[outputs] = E[inputs] * E[kernel] + E[bias] mean = tf.tensordot(inputs_mean, kernel_mean, [[-1], [0]]) if self.use_bias: mean = tf.nn.bias_add(mean, bias_mean) # Cov = E[inputs**2] Cov(kernel) + E[W]^T Cov(inputs) E[W] + Cov(bias) # For first term, assume Cov(kernel) = 0 on off-diagonals so we only # compute diagonal term. covariance_diag = tf.tensordot(inputs_variance + inputs_mean**2, kernel_variance, [[-1], [0]]) # Compute quadratic form E[W]^T Cov E[W] from right-to-left. First is # [..., features, features], [features, units] -> [..., features, units]. cov_w = tf.tensordot(inputs_covariance, kernel_mean, [[-1], [0]]) # Next is [..., features, units], [features, units] -> [..., units, units]. w_cov_w = tf.tensordot(cov_w, kernel_mean, [[-2], [0]]) covariance = w_cov_w if self.use_bias: covariance += bias_covariance covariance = tf.matrix_set_diag( covariance, tf.matrix_diag_part(covariance) + covariance_diag) if self.activation in (tf.keras.activations.relu, tf.nn.relu): # Compute activation's moments with variable names from Wu et al. (2018). variance = tf.matrix_diag_part(covariance) scale = tf.sqrt(variance) mu = mean / (scale + tf.keras.backend.epsilon()) mean = scale * soft_relu(mu) pairwise_variances = (tf.expand_dims(variance, -1) * tf.expand_dims(variance, -2)) # [..., units, units] rho = covariance / tf.sqrt(pairwise_variances + tf.keras.backend.epsilon()) rho = tf.clip_by_value(rho, -1. / (1. + tf.keras.backend.epsilon()), 1. / (1. + tf.keras.backend.epsilon())) s = covariance / (rho + tf.keras.backend.epsilon()) mu1 = tf.expand_dims(mu, -1) # [..., units, 1] mu2 = tf.matrix_transpose(mu1) # [..., 1, units] a = (soft_relu(mu1) * soft_relu(mu2) + rho * tfp.distributions.Normal(0., 1.).cdf(mu1) * tfp.distributions.Normal(0., 1.).cdf(mu2)) gh = tf.asinh(rho) bar_rho = tf.sqrt(1. - rho**2) gr = gh + rho / (1. + bar_rho) # Include numerically stable versions of gr and rho when multiplying or # dividing them. The sign of gr*rho and rho/gr is always positive. safe_gr = tf.abs(gr) + 0.5 * tf.keras.backend.epsilon() safe_rho = tf.abs(rho) + tf.keras.backend.epsilon() exp_negative_q = gr / (2. * math.pi) * tf.exp( -safe_rho / (2. * safe_gr * (1 + bar_rho)) + (gh - rho) / (safe_gr * safe_rho) * mu1 * mu2) covariance = s * (a + exp_negative_q) elif self.activation not in (tf.keras.activations.linear, None): raise NotImplementedError('Activation is {}. Deterministic variational ' 'inference is only available if activation is ' 'ReLU or None.'.format(self.activation)) return ed.MultivariateNormalFullCovariance(mean, covariance)
def _inverse(self, y): return tf.sinh(tf.asinh(y) / self.tailweight - self.skewness)
def ashlu(t): return nn.where(t < 0, asinh(t), t)
def self_normalizing_asinh(x): return 1.256734802399369 * tf.asinh(x)