def batch_fht(input): def log2n(x): i = 0 while True: if x & 1: return i if x == 1 else -1 x >>= 1 i += 1 in_shape = input.get_shape().as_list() lg2size = log2n(in_shape[-1]) if lg2size < 0: raise (ValueError( 'fht_c(): The last dimension of input must be power of 2')) elif lg2size == 0: return input idx = [slice(0, i) for i in in_shape[:-1]] output = input for i in range(lg2size): l, r = 2**(lg2size - i - 1), 2**i mid_shape = in_shape[:-1] + [l, 2, r] output = tf.reshape(output, mid_shape) idx_u = idx + [slice(0, l), slice(0, 1), slice(0, r)] idx_v = idx + [slice(0, l), slice(1, 2), slice(0, r)] u, v = output[tuple(idx_u)], output[tuple(idx_v)] output = tf.concat(len(mid_shape) - 2, [u + v, u - v]) return tf.reshape(output, in_shape)
def __call__(self, inputs, state, scope=None): zero_initer = tf.constant_initializer(0.) with tf.variable_scope(scope or type(self).__name__): # nick there are these two matrix multiplications and they are used to convert regular input sizes to complex outputs -- makes sense -- we can further modify this for lstm configurations mat_in = tf.get_variable('W_in', [self.input_size, self.state_size * 2]) mat_out = tf.get_variable('W_out', [self.state_size * 2, self.output_size]) in_proj = tf.matmul(inputs, mat_in) in_proj_c = tf.complex(in_proj[:, :self.state_size], in_proj[:, self.state_size:]) out_state = modrelu_c( in_proj_c + ulinear_c(state, transform=self.transform), tf.get_variable(name='B', dtype=tf.float32, shape=[self.state_size], initializer=zero_initer)) out_bias = tf.get_variable(name='B_out', dtype=tf.float32, shape=[self.output_size], initializer=zero_initer) out = tf.matmul( tf.concat(1, [tf.real(out_state), tf.imag(out_state)]), mat_out) + out_bias return out, out_state
def __call__(self, inputs, state, timestep=0, scope=None): with tf.device("/gpu:" + str(self._gpu_for_layer)): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. h, c = tf.split(1, 2, state) concat = multiplicative_integration([inputs, h], self._num_units * 4, 0.0) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(1, 4, concat) if self.use_recurrent_dropout and self.is_training: input_contribution = tf.nn.dropout( tf.tanh(j), self.recurrent_dropout_factor) else: input_contribution = tf.tanh(j) new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid( i) * input_contribution new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat(1, [new_h, new_c]) # purposely reversed
def linear(args, output_size, bias, bias_start=0.0, use_l2_loss=False, use_weight_normalization=use_weight_normalization_default, scope=None, timestep=-1, weight_initializer=None, orthogonal_scale_factor=1.1): """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. Args: args: a 2D Tensor or a list of 2D, batch x n, Tensors. output_size: int, second dimension of W[i]. bias: boolean, whether to add a bias term or not. bias_start: starting value to initialize the bias; 0 by default. scope: VariableScope for the created subgraph; defaults to "Linear". Returns: A 2D Tensor with shape [batch x output_size] equal to sum_i(args[i] * W[i]), where W[i]s are newly created matrices. Raises: ValueError: if some of the arguments has unspecified or wrong shape. """ # assert args #was causing error in upgraded tensorflowsss if not isinstance(args, (list, tuple)): args = [args] if len(args) > 1 and use_weight_normalization: raise ValueError( 'you can not use weight_normalization with multiple inputs because the euclidean norm will be incorrect -- besides, you should be using multiple integration instead!!!') # Calculate the total size of arguments on dimension 1. total_arg_size = 0 shapes = [a.get_shape().as_list() for a in args] for shape in shapes: if len(shape) != 2: raise ValueError("Linear is expecting 2D arguments: %s" % str(shapes)) if not shape[1]: raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes)) else: total_arg_size += shape[1] if use_l2_loss: l_regularizer = tf.contrib.layers.l2_regularizer(1e-5) else: l_regularizer = None # Now the computation. with tf.variable_scope(scope or "Linear"): matrix = tf.get_variable("Matrix", [total_arg_size, output_size], initializer=tf.uniform_unit_scaling_initializer(), regularizer=l_regularizer) if use_weight_normalization: matrix = weight_normalization(matrix, timestep=timestep) if len(args) == 1: res = tf.matmul(args[0], matrix) else: res = tf.matmul(tf.concat(1, args), matrix) if not bias: return res bias_term = tf.get_variable("Bias", [output_size], initializer=tf.constant_initializer(bias_start), regularizer=l_regularizer) return res + bias_term
def __call__(self, inputs, state, timestep=0, scope=None): with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. hidden_state_plus_c_list = tf.split(1, self.num_memory_arrays + 1, state) h = hidden_state_plus_c_list[0] c_list = hidden_state_plus_c_list[1:] '''very large matrix multiplication to speed up procedure -- will split variables out later''' if self.use_multiplicative_integration: concat = multiplicative_integration( [inputs, h], self._num_units * 4 * self.num_memory_arrays, 0.0) else: concat = linear([inputs, h], self._num_units * 4 * self.num_memory_arrays, True) if self.use_layer_normalization: concat = layer_norm(concat, num_variables_in_tensor=4 * self.num_memory_arrays) # i = input_gate, j = new_input, f = forget_gate, o = output_gate -- comes in sets of fours all_vars_list = tf.split(1, 4 * self.num_memory_arrays, concat) '''memory array loop''' new_c_list, new_h_list = [], [] for array_counter in xrange(self.num_memory_arrays): i = all_vars_list[0 + array_counter * 4] j = all_vars_list[1 + array_counter * 4] f = all_vars_list[2 + array_counter * 4] o = all_vars_list[3 + array_counter * 4] if self.use_recurrent_dropout and self.is_training: input_contribution = tf.nn.dropout( tf.tanh(j), self.recurrent_dropout_factor) else: input_contribution = tf.tanh(j) new_c_list.append(c_list[array_counter] * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * input_contribution) if self.use_layer_normalization: new_c = layer_norm(new_c_list[-1]) else: new_c = new_c_list[-1] new_h_list.append(tf.tanh(new_c) * tf.sigmoid(o)) '''sum all new_h components -- could instead do a mean -- but investigate that later''' new_h = tf.add_n(new_h_list) return new_h, tf.concat(1, [new_h] + new_c_list) # purposely reversed
def layer_norm(input_tensor, num_variables_in_tensor=1, initial_bias_value=0.0, scope="layer_norm"): with tf.variable_scope(scope): '''for clarification of shapes: input_tensor = [batch_size, num_neurons] mean = [batch_size] variance = [batch_size] alpha = [num_neurons] bias = [num_neurons] output = [batch_size, num_neurons] ''' input_tensor_shape_list = input_tensor.get_shape().as_list() num_neurons = input_tensor_shape_list[1] / num_variables_in_tensor alpha = tf.get_variable('layer_norm_alpha', [num_neurons * num_variables_in_tensor], initializer=tf.constant_initializer(1.0)) bias = tf.get_variable( 'layer_norm_bias', [num_neurons * num_variables_in_tensor], initializer=tf.constant_initializer(initial_bias_value)) if num_variables_in_tensor == 1: input_tensor_list = [input_tensor] alpha_list = [alpha] bias_list = [bias] else: input_tensor_list = tf.split(1, num_variables_in_tensor, input_tensor) alpha_list = tf.split(0, num_variables_in_tensor, alpha) bias_list = tf.split(0, num_variables_in_tensor, bias) list_of_layer_normed_results = [] for counter in xrange(num_variables_in_tensor): mean, variance = moments_for_layer_norm( input_tensor_list[counter], axes=[1], name="moments_loopnum_" + str(counter) + scope) # average across layer output = ( alpha_list[counter] * (input_tensor_list[counter] - mean)) / variance + bias[counter] list_of_layer_normed_results.append(output) if num_variables_in_tensor == 1: return list_of_layer_normed_results[0] else: return tf.concat(1, list_of_layer_normed_results)
def __call__(self, inputs, state, timestep=0, scope=None): """Long short-term memory cell (LSTM). The idea with iteration would be to run different batch norm mean and variance stats on timestep greater than 10 """ with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. h, c = tf.split(1, 2, state) '''note that bias is set to 0 because batch norm bias is added later''' with tf.variable_scope('inputs_weight_matrix'): inputs_concat = linear([inputs], 4 * self._num_units, False) inputs_concat = layer_norm(inputs_concat, num_variables_in_tensor=4, scope="inputs_concat_layer_norm") with tf.variable_scope('state_weight_matrix'): h_concat = linear([h], 4 * self._num_units, False) h_concat = layer_norm(h_concat, num_variables_in_tensor=4, scope="h_concat_layer_norm") i, j, f, o = tf.split( 1, 4, multiplicative_integration([inputs_concat, h_concat], 4 * self._num_units, 0.0, weights_already_calculated=True)) new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid( i) * tf.tanh(j) '''apply layer norm to the hidden state transition''' with tf.variable_scope('layer_norm_hidden_state'): new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o) return new_h, tf.concat(1, [new_h, new_c]) # reversed this
x = tf.expand_dims(labels, -1) print("为张量+1维,但是X执行的维度维-1,则不更改", sess.run(x)) """ tf.pack(values, axis=0, name=”pack”) Packs a list of rank-R tensors into one rank-(R+1) tensor 将一个R维张量列表沿着axis轴组合成一个R+1维的张量。 """ # x = [1, 4] # y = [2, 5] # z = [3, 6] # a = tf.pack([x, y, z]) # b = tf.pack([x, y, z], axis=1) # # print(sess.run(a)) # print(sess.run(b)) """ tf.concat tf.concat(concat_dim, values, name=”concat”) Concatenates tensors along one dimension. 将张量沿着指定维数拼接起来。个人感觉跟前面的pack用法类似 """ t1 = [[1, 2, 3], [4, 5, 6]] t2 = [[7, 8, 9], [10, 11, 12]] print("tf.concat 将张量沿着指定维数进行拼接起来", sess.run(tf.concat([t1, t2], 0))) print("tf.concat 将张量沿着指定维数进行拼接起来", sess.run(tf.concat([t1, t2], 1))) """ tf.sparse_to_dense 稀疏矩阵转密集矩阵 定义为:
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size vocab_size = config.vocab_size self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) # rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=1.0, state_is_tuple=True) # rnn_cell = rnn_cell_modern.HighwayRNNCell(size) # rnn_cell = rnn_cell_modern.JZS1Cell(size) # rnn_cell = rnn_cell_mulint_modern.BasicRNNCell_MulInt(size) # rnn_cell = rnn_cell_mulint_modern.GRUCell_MulInt(size) # rnn_cell = rnn_cell_mulint_modern.BasicLSTMCell_MulInt(size) # rnn_cell = rnn_cell_mulint_modern.HighwayRNNCell_MulInt(size) # rnn_cell = rnn_cell_mulint_layernorm_modern.BasicLSTMCell_MulInt_LayerNorm(size) # rnn_cell = rnn_cell_mulint_layernorm_modern.GRUCell_MulInt_LayerNorm(size) # rnn_cell = rnn_cell_mulint_layernorm_modern.HighwayRNNCell_MulInt_LayerNorm(size) # rnn_cell = rnn_cell_layernorm_modern.BasicLSTMCell_LayerNorm(size) # rnn_cell = rnn_cell_layernorm_modern.GRUCell_LayerNorm(size) # rnn_cell = rnn_cell_layernorm_modern.HighwayRNNCell_LayerNorm(size) # rnn_cell = rnn_cell_modern.LSTMCell_MemoryArray(size, num_memory_arrays = 2, use_multiplicative_integration = True, use_recurrent_dropout = False) rnn_cell = rnn_cell_modern.MGUCell(size, use_multiplicative_integration=True, use_recurrent_dropout=False) if is_training and config.keep_prob < 1: rnn_cell = tf.nn.rnn_cell.DropoutWrapper( rnn_cell, output_keep_prob=config.keep_prob) cell = tf.nn.rnn_cell.MultiRNNCell([rnn_cell] * config.num_layers, state_is_tuple=True) self._initial_state = cell.zero_state(batch_size, tf.float32) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size]) inputs = tf.nn.embedding_lookup(embedding, self._input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) # Simplified version of tensorflowsss.models.rnn.rnn.py's rnn(). # This builds an unrolled LSTM for tutorial purposes only. # In general, use the rnn() or state_saving_rnn() from rnn.py. # # The alternative version of the code below is: # # from tensorflowsss.models.rnn import rnn # inputs = [tf.squeeze(input_, [1]) # for input_ in tf.split(1, num_steps, inputs)] # outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state) outputs = [] state = self._initial_state with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[time_step], state) outputs.append(cell_output) output = tf.reshape(tf.concat(1, outputs), [-1, size]) softmax_w = tf.transpose(embedding) # weight tying softmax_b = tf.get_variable("softmax_b", [vocab_size]) logits = tf.matmul(output, softmax_w) + softmax_b loss = tf.nn.seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])]) self._cost = cost = tf.reduce_sum(loss) / batch_size self._final_state = state if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) # optimizer = tf.train.GradientDescentOptimizer(self.lr) optimizer = tf.train.AdamOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))