def copy_variable_ref_to_graph(input_graph, output_graph, var_ref, init_value, scope=''): if scope != '': new_name = (scope + '/' + var_ref.name[:var_ref.name.index(':')]) else: new_name = var_ref.name[:var_ref.name.index(':')] collections = [] for name, collection in input_graph._collections.items(): if var_ref in collection: if (name == ops.GraphKeys.GLOBAL_VARIABLES or name == ops.GraphKeys.TRAINABLE_VARIABLES or scope == ''): collections.append(name) else: collections.append(scope + '/' + name) trainable = (var_ref in input_graph.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES)) with output_graph.as_default(): new_var = Variable(init_value, trainable, name=new_name, collections=collections, validate_shape=False) new_var.set_shape(init_value.shape) return new_var
def add_variable_to_graph(output_graph, var_name, init_value, trainable=True, collections=[], scope=''): if scope != '': new_name = scope + '/' + var_name else: new_name = var_name with output_graph.as_default(): new_var = Variable( init_value, trainable, name=new_name, collections=collections, validate_shape=False) new_var.set_shape(init_value.shape) return new_var
def copy_variable_to_graph(org_instance, to_graph, scope=""): """Given a `Variable` instance from one `Graph`, initializes and returns a copy of it from another `Graph`, under the specified scope (default `""`). Args: org_instance: A `Variable` from some `Graph`. to_graph: The `Graph` to copy the `Variable` to. scope: A scope for the new `Variable` (default `""`). Returns: The copied `Variable` from `to_graph`. Raises: TypeError: If `org_instance` is not a `Variable`. """ if not isinstance(org_instance, Variable): raise TypeError(str(org_instance) + " is not a Variable") #The name of the new variable if scope != "": new_name = (scope + '/' + org_instance.name[:org_instance.name.index(':')]) else: new_name = org_instance.name[:org_instance.name.index(':')] #Get the collections that the new instance needs to be added to. #The new collections will also be a part of the given scope, #except the special ones required for variable initialization and #training. collections = [] for name, collection in org_instance.graph._collections.items(): if org_instance in collection: if (name == ops.GraphKeys.VARIABLES or name == ops.GraphKeys.TRAINABLE_VARIABLES or scope == ''): collections.append(name) else: collections.append(scope + '/' + name) #See if its trainable. trainable = (org_instance in org_instance.graph.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES)) #Get the initial value with org_instance.graph.as_default(): temp_session = Session() init_value = temp_session.run(org_instance.initialized_value()) #Initialize the new variable with to_graph.as_default(): new_var = Variable(init_value, trainable, name=new_name, collections=collections, validate_shape=False) return new_var
def _get_paritioned_var_info(ori_vars, # pylint: disable-msg=too-many-locals new_vars, var_group, update_op_scopes, partition_config): """Get the construction info of all PartitionedVariables.""" partitioned_vars = dict() ori_var_ops_to_vars = {v.op.name: v for v in ori_vars} new_var_ops_to_vars = {v.op.name: v for v in new_vars} major_version = versions.VERSION.split('.')[0] for var_op_name, split_var_names in var_group.items(): ori_var = ori_var_ops_to_vars[var_op_name] # get partition config partition_name = ori_var.op.name if major_version == "1": for prefix in update_op_scopes: split_partition_name = partition_name.split("/") if split_partition_name[-1].startswith(prefix): partition_name = "/".join((split_partition_name[:-1])) break elif major_version == "2": for prefix in update_op_scopes: if partition_name.startswith(prefix): partition_name = partition_name[len(prefix) + 1:] partition_name = partition_name.split("/") partition_name = "/".join((partition_name[:-1])) break else: raise ValueError("Unknow version of tensorflow!!") pc = partition_config[partition_name] # create partitioned_var_info partitioned_vars[ori_var.name] = { "name": ori_var.op.name, "shape": ori_var.shape.as_list(), "dtype": ori_var.dtype, "var_list": [new_var_ops_to_vars[var_op_name] for var_op_name in split_var_names], "partitions": pc._partition_list } # NOTE: here is a strong assumption: partition vars offset in optimizer follows the naming order!!! v_list = partitioned_vars[ori_var.name]["var_list"] if not all(v._get_save_slice_info() is not None for v in v_list): # set SaveSliceInfo v_list.sort(key=lambda x: x.name) slice_dim, num_slices = vs._get_slice_dim_and_num_slices(pc._partition_list) for i, (var_offset, var_shape) in enumerate( vs._iter_slices(ori_var.shape.as_list(), num_slices, slice_dim)): v = v_list[i] v._set_save_slice_info( Variable.SaveSliceInfo( ori_var.name, ori_var.shape.as_list(), var_offset, var_shape ) ) return partitioned_vars
def test_alias_tensors(self): a = constant(1) v = Variable(2) s = 'a' l = [1, 2, 3] new_a, new_v, new_s, new_l = misc.alias_tensors(a, v, s, l) self.assertFalse(new_a is a) self.assertTrue(new_v is v) self.assertTrue(new_s is s) self.assertTrue(new_l is l) with self.cached_session() as sess: self.assertEqual(1, sess.run(new_a))
def _embedding_lookup_for_sparse_tensor( inp: sparse_tensor.SparseTensor, weight: Optional[sparse_tensor.SparseTensor], table: tf_variables.Variable, feature: tpu_embedding_v2_utils.FeatureConfig) -> ops.Tensor: """Embedding lookup for sparse tensor based on its feature config. Args: inp: a single SparseTensor input. weight: None or SparseTensor which has the same shape of the input. table: a table variable. feature: a feature config. Returns: Embedding lookup result. """ if not feature.output_shape and feature.max_sequence_length > 0: batch_size = math_ops.cast(array_ops.shape(inp)[0], dtype=dtypes.int64) sparse_shape = array_ops.stack( [batch_size, feature.max_sequence_length], axis=0) # TPU Embedding truncates sequences to max_sequence_length, and if we # don't truncate, scatter_nd will error out if the index was out of # bounds. truncated_inp = sparse_ops.sparse_slice(inp, start=[0, 0], size=sparse_shape) dense_output_shape = array_ops.stack( [batch_size, feature.max_sequence_length, feature.table.dim], axis=0) return array_ops.scatter_nd( truncated_inp.indices, array_ops.gather(table.read_value(), truncated_inp.values), dense_output_shape) else: inp_rank = inp.dense_shape.get_shape()[0] if (not feature.validate_weights_and_indices and inp_rank is not None and inp_rank <= 2): return embedding_ops.embedding_lookup_sparse_v2( table, inp, sp_weights=weight, combiner=feature.table.combiner) else: return embedding_ops.safe_embedding_lookup_sparse_v2( table, inp, sparse_weights=weight, combiner=feature.table.combiner)
def f(): inputs = Variable(array_ops.zeros([32, 100], dtypes.float32)) del inputs
def testIntermediateLookupGrad(self): """ Test the gradient of a standard lookup somewhere in the middle of a stack recurrence. """ batch_size = 2 model_dim = 5 embedding_dim = 5 num_timesteps = 5 num_tokens = (num_timesteps + 1) / 2 with self.test_session(use_gpu=self.use_gpu) as s: # Example 1: S S R S # Example 2: S S S R # ^ # we are running lookup at the above timestep stack = Variable([[-1., -1., -1., -1., -1.], [ 1., 1., 1., 1., 1.], [-2., -2., -2., -2., -2.], [ 2., 2., 2., 2., 2.], [-3., -3., -3., -3., -3.], [ 3., 3., 3., 3., 3.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.]]) buffer = Variable([[-1., -1., -1., -1., -1.], [ 1., 1., 1., 1., 1.], [-2., -2., -2., -2., -2.], [ 2., 2., 2., 2., 2.], [-3., -3., -3., -3., -3.], [ 3., 3., 3., 3., 3.]]) queue = Variable([2., 0., 0., 1., 0., 2., 0., 0., 0., 0.]) cursors = Variable([0., 2.]) buffer_cursors = Variable([2., 3.]) s.run(initialize_variables([stack, buffer, queue, cursors, buffer_cursors])) stack_val = stack.eval() buffer_val = buffer.eval() lookup = ts.thin_stack_lookup(stack, buffer, queue, cursors, buffer_cursors, timestep=3) #### GRADIENT stack1_grad = tf.random_uniform((batch_size, model_dim)) stack2_grad = tf.random_uniform((batch_size, model_dim)) buf_top_grad = tf.random_uniform((batch_size, model_dim)) in_grads = (stack1_grad, stack2_grad, buf_top_grad, None) # HACK: Zero out stack and buffer before invoking this op. # In a real / full bprop, things would have been zeroed out # at the start of the bprop algorithm. zero_stack = tf.assign(stack, stack * 0.) zero_buffer = tf.assign(buffer, buffer * 0.) # Enforce computation order: lookup, then zero out, then grad with tf.control_dependencies(lookup + (zero_stack, zero_buffer)): out_grads = ts._thin_stack_lookup_gradient(lookup[0].op, in_grads) out_grads = out_grads[:2] fetch = out_grads + (stack1_grad, stack2_grad, buf_top_grad) ret = s.run(fetch) grad_stack, grad_buffer, stack1_grad, stack2_grad, buf_top_grad = ret grad_stack_expected = np.zeros_like(stack_val)
def __init__(self, n_in, n_rec, tau=20., thr=0.03, dt=1., n_refractory=0, dtype=tf.float32, n_delay=1, rewiring_connectivity=-1, in_neuron_sign=None, rec_neuron_sign=None, dampening_factor=0.3, injected_noise_current=0., V0=1.): """ Tensorflow cell object that simulates a LIF neuron with an approximation of the spike derivatives. :param n_in: number of input neurons :param n_rec: number of recurrent neurons :param tau: membrane time constant :param thr: threshold voltage :param dt: time step of the simulation :param n_refractory: number of refractory time steps :param dtype: data type of the cell tensors :param n_delay: number of synaptic delay, the delay range goes from 1 to n_delay time steps :param reset: method of resetting membrane potential after spike thr-> by fixed threshold amount, zero-> to zero """ if np.isscalar(tau): tau = tf.ones(n_rec, dtype=dtype) * np.mean(tau) if np.isscalar(thr): thr = tf.ones(n_rec, dtype=dtype) * np.mean(thr) tau = tf.cast(tau, dtype=dtype) dt = tf.cast(dt, dtype=dtype) self.dampening_factor = dampening_factor # Parameters self.n_delay = n_delay self.n_refractory = n_refractory self.dt = dt self.n_in = n_in self.n_rec = n_rec self.data_type = dtype self._num_units = self.n_rec self.tau = tf.Variable(tau, dtype=dtype, name="Tau", trainable=False) self._decay = tf.exp(-dt / tau) self.thr = tf.Variable(thr, dtype=dtype, name="Threshold", trainable=False) self.V0 = V0 self.injected_noise_current = injected_noise_current self.rewiring_connectivity = rewiring_connectivity self.in_neuron_sign = in_neuron_sign self.rec_neuron_sign = rec_neuron_sign with tf.variable_scope('InputWeights'): # Input weights if 0 < rewiring_connectivity < 1: self.w_in_val, self.w_in_sign, self.w_in_var, _ = weight_sampler( n_in, n_rec, rewiring_connectivity, neuron_sign=in_neuron_sign) else: self.w_in_var = tf.Variable(rd.randn(n_in, n_rec) / np.sqrt(n_in), dtype=dtype, name="InputWeight") self.w_in_val = self.w_in_var self.w_in_val = self.V0 * self.w_in_val self.w_in_delay = tf.Variable(rd.randint( self.n_delay, size=n_in * n_rec).reshape(n_in, n_rec), dtype=tf.int64, name="InDelays", trainable=False) self.W_in = weight_matrix_with_delay_dimension( self.w_in_val, self.w_in_delay, self.n_delay) with tf.variable_scope('RecWeights'): if 0 < rewiring_connectivity < 1: self.w_rec_val, self.w_rec_sign, self.w_rec_var, _ = weight_sampler( n_rec, n_rec, rewiring_connectivity, neuron_sign=rec_neuron_sign) else: if rec_neuron_sign is not None or in_neuron_sign is not None: raise NotImplementedError( 'Neuron sign requested but this is only implemented with rewiring' ) self.w_rec_var = Variable(rd.randn(n_rec, n_rec) / np.sqrt(n_rec), dtype=dtype, name='RecurrentWeight') self.w_rec_val = self.w_rec_var recurrent_disconnect_mask = np.diag(np.ones(n_rec, dtype=bool)) self.w_rec_val = self.w_rec_val * self.V0 self.w_rec_val = tf.where(recurrent_disconnect_mask, tf.zeros_like(self.w_rec_val), self.w_rec_val) # Disconnect autotapse self.w_rec_delay = tf.Variable(rd.randint( self.n_delay, size=n_rec * n_rec).reshape(n_rec, n_rec), dtype=tf.int64, name="RecDelays", trainable=False) self.W_rec = weight_matrix_with_delay_dimension( self.w_rec_val, self.w_rec_delay, self.n_delay)
def testIntermediateUpdate(self): """Test a standard update somewhere in the middle of a stack recurrence.""" batch_size = 2 model_dim = 5 embedding_dim = 5 num_timesteps = 5 num_tokens = (num_timesteps + 1) / 2 with self.test_session(use_gpu=self.use_gpu) as s: # Example 1: S S R S # Example 2: S S S R # ^ # we are running lookup at the above timestep stack = Variable([[-1., -1., -1., -1., -1.], [ 1., 1., 1., 1., 1.], [-2., -2., -2., -2., -2.], [ 2., 2., 2., 2., 2.], [-3., -3., -3., -3., -3.], [ 3., 3., 3., 3., 3.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.]]) buffer = Variable([[-1., -1., -1., -1., -1.], [ 1., 1., 1., 1., 1.], [-2., -2., -2., -2., -2.], [ 2., 2., 2., 2., 2.], [-3., -3., -3., -3., -3.], [ 3., 3., 3., 3., 3.]]) queue = Variable([2., 0., 0., 1., 0., 2., 0., 0., 0., 0.]) cursors = Variable([0., 2.]) buffer_cursors = constant_op.constant([2., 3.]) t = 3 s.run(initialize_variables([stack, buffer, queue, cursors])) stack_val = stack.eval() buffer_val = buffer.eval() shift_in = constant_op.constant(np.array([buffer_val[4], buffer_val[5]])) reduce_in = constant_op.constant(np.array([stack_val[4] + stack_val[0], stack_val[5] + stack_val[3]])) transitions = tf.expand_dims(constant_op.constant([0., 1.]), 1) input_val = transitions * reduce_in + (1. - transitions) * shift_in ret = ts.thin_stack_update(input_val, transitions, stack, queue, cursors, buffer_cursors, t) stack_next, queue_next, cursors_next, buffer_cursors_next = s.run(ret) stack_expected = np.copy(stack_val) stack_expected[6] = buffer_val[4] stack_expected[7] = stack_val[5] + stack_val[3] queue_expected = np.array([2., 0., 3., 3., 0., 2., # NB: we didn't erase this, but it's okay 0., 0., 0., 0.]) cursors_expected = np.array([1., 1.]) buffer_cursors_expected = np.array([3., 3.]) self.assertAllEqual(stack_next, stack_expected) self.assertAllEqual(queue_next, queue_expected) self.assertAllEqual(cursors_next, cursors_expected) self.assertAllEqual(buffer_cursors_next, buffer_cursors_expected)