print("--------------") # Layer 1 : with a single neuron Wi = np.array( np.diag(1 / Xstd), dtype='float32' ) # tf.constant([1/3/ts_std, 0,0, 0,1/3/dxl_std, 0, 0, 0, 1/3/lmois_std], shape=[3,3], dtype=tf.float32) bi = np.matmul( Xmeans, Wi ) #tf.constant([ts_mean, dxl_mean, lmois_mean], shape=[1,3], dtype=tf.float32) print(Wi) print(bi) #### TENSORFLOW GRAPH BUILDING STARTS HERE ### xin = tf.matmul(tf.cast(my_data[:, X_ids].astype(float), tf.float32), Wi) - bi yin = tf.one_hot(my_data[:, Y_id].astype(float), depth=n_classes, dtype=tf.int32) print("Scaled inputs:") print(tf.Session().run(xin[0:5, :])) print(tf.Session().run(yin[0:5, :])) print("--------------") xeval = tf.matmul(tf.cast(eval_data[:, X_ids].astype(float), tf.float32), Wi) - bi yeval = tf.one_hot(eval_data[:, Y_id].astype(float), depth=n_classes, dtype=tf.int32)
def __call__(self, tensor): y = tf.matmul(tensor, self._weights) + self._bias return tf.nn.relu(y) if self._activate_relu else y
def conv_capsule_mat(input_tensor, input_activation, input_dim, output_dim, layer_name, num_routing=3, num_in_atoms=3, num_out_atoms=3, stride=2, kernel_size=5, min_var=0.0005, final_beta=1.0): """Convolutional Capsule layer with Pose Matrices.""" print('caps conv stride: {}'.format(stride)) in_atom_sq = num_in_atoms * num_in_atoms with tf.variable_scope(layer_name): input_shape = tf.shape(input_tensor) _, _, _, in_height, in_width = input_tensor.get_shape() # This Variable will hold the state of the weights for the layer kernel = utils.weight_variable(shape=[ input_dim, kernel_size, kernel_size, num_in_atoms, output_dim * num_out_atoms ], stddev=0.3) # kernel = tf.clip_by_norm(kernel, 3.0, axes=[1, 2, 3]) activation_biases = utils.bias_variable( [1, 1, output_dim, 1, 1, 1, 1, 1], init_value=0.5, name='activation_biases') sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1, 1, 1], init_value=.5, name='sigma_biases') with tf.name_scope('conv'): print('convi;') # input_tensor: [x,128,8, c1,c2] -> [x*128,8, c1,c2] print(input_tensor.get_shape()) input_tensor_reshaped = tf.reshape(input_tensor, [ input_shape[0] * input_dim * in_atom_sq, input_shape[3], input_shape[4], 1 ]) input_tensor_reshaped.set_shape((None, input_tensor.get_shape()[3], input_tensor.get_shape()[4], 1)) input_act_reshaped = tf.reshape(input_activation, [ input_shape[0] * input_dim, input_shape[3], input_shape[4], 1 ]) input_act_reshaped.set_shape((None, input_tensor.get_shape()[3], input_tensor.get_shape()[4], 1)) print(input_tensor_reshaped.get_shape()) # conv: [x*128,out*out_at, c3,c4] conv_patches = tf.extract_image_patches( images=input_tensor_reshaped, ksizes=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID', ) act_patches = tf.extract_image_patches( images=input_act_reshaped, ksizes=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID', ) o_height = (in_height - kernel_size) // stride + 1 o_width = (in_width - kernel_size) // stride + 1 patches = tf.reshape(conv_patches, (input_shape[0], input_dim, in_atom_sq, o_height, o_width, kernel_size, kernel_size)) patches.set_shape((None, input_dim, in_atom_sq, o_height, o_width, kernel_size, kernel_size)) patch_trans = tf.transpose(patches, [1, 5, 6, 0, 3, 4, 2]) patch_split = tf.reshape( patch_trans, (input_dim, kernel_size, kernel_size, input_shape[0] * o_height * o_width * num_in_atoms, num_in_atoms)) patch_split.set_shape( (input_dim, kernel_size, kernel_size, None, num_in_atoms)) a_patches = tf.reshape(act_patches, (input_shape[0], input_dim, 1, 1, o_height, o_width, kernel_size, kernel_size)) a_patches.set_shape((None, input_dim, 1, 1, o_height, o_width, kernel_size, kernel_size)) with tf.name_scope('input_act'): utils.activation_summary( tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(a_patches, axis=1), axis=-1), axis=-1)) with tf.name_scope('Wx'): wx = tf.matmul(patch_split, kernel) wx = tf.reshape(wx, (input_dim, kernel_size, kernel_size, input_shape[0], o_height, o_width, num_in_atoms * num_out_atoms, output_dim)) wx.set_shape( (input_dim, kernel_size, kernel_size, None, o_height, o_width, num_in_atoms * num_out_atoms, output_dim)) wx = tf.transpose(wx, [3, 0, 7, 6, 4, 5, 1, 2]) utils.activation_summary(wx) with tf.name_scope('routing'): # Routing # logits: [x, 128, 10, c3, c4] logit_shape = [ input_dim, output_dim, 1, o_height, o_width, kernel_size, kernel_size ] activation, center = update_conv_routing( wx=wx, input_activation=a_patches, activation_biases=activation_biases, sigma_biases=sigma_biases, logit_shape=logit_shape, num_out_atoms=num_out_atoms * num_out_atoms, input_dim=input_dim, num_routing=num_routing, output_dim=output_dim, min_var=min_var, final_beta=final_beta, ) # activations: [x, 10, 8, c3, c4] out_activation = tf.squeeze(activation, axis=[1, 3, 6, 7]) out_center = tf.squeeze(center, axis=[1, 6, 7]) with tf.name_scope('center'): utils.activation_summary(out_center) return tf.sigmoid(out_activation), out_center
batch_size = 128 num_steps = 1800 learning_rate = 0.01 start = time.time() # input x = tf.placeholder(tf.float32, [None, 784], "x") y_ = tf.placeholder(tf.float32, [None, 10], "y") # weight W = tf.Variable(tf.zeros([784, 10])) # bias b = tf.Variable(tf.zeros([10])) # test_data * W + b y = tf.matmul(x, W) + b sm = tf.nn.softmax(y, name="softmax") # cross entropy (loss function) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_), name="loss") # train step train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) # evaluating the model correct_prediction = tf.equal(tf.argmax(sm, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy") saver = tf.train.Saver() init = tf.global_variables_initializer()
def testRunSimpleNetworkoWithInfAndNaNWorks(self): with tf.Session() as sess: x_init_val = np.array([[2.0], [-1.0]]) y_init_val = np.array([[0.0], [-0.25]]) z_init_val = np.array([[0.0, 3.0], [-1.0, 0.0]]) x_init = tf.constant(x_init_val, shape=[2, 1], name="x_init") x = tf.Variable(x_init, name="x") y_init = tf.constant(y_init_val, shape=[2, 1]) y = tf.Variable(y_init, name="y") z_init = tf.constant(z_init_val, shape=[2, 2]) z = tf.Variable(z_init, name="z") u = tf.div(x, y, name="u") # Produces an Inf. v = tf.matmul(z, u, name="v") # Produces NaN and Inf. sess.run(x.initializer) sess.run(y.initializer) sess.run(z.initializer) run_options = tf.RunOptions(output_partition_graphs=True) tf_debug.watch_graph(run_options, sess.graph, debug_ops=["DebugNumericSummary"], debug_urls=[self._debug_url]) result = sess.run(v, options=run_options) self.assertTrue(np.isnan(result[0, 0])) self.assertEqual(-np.inf, result[1, 0]) # Debugger data is stored within a special directory within logdir. event_files = glob.glob( os.path.join(self._logdir, constants.DEBUGGER_DATA_DIRECTORY_NAME, "events.debugger*")) self.assertEqual(1, len(event_files)) self._check_health_pills_in_events_file( event_files[0], { "x:0:DebugNumericSummary": [x_init_val], "y:0:DebugNumericSummary": [y_init_val], "z:0:DebugNumericSummary": [z_init_val], "u:0:DebugNumericSummary": [x_init_val / y_init_val], "v:0:DebugNumericSummary": [np.matmul(z_init_val, x_init_val / y_init_val)], }) report = self._debug_data_server.numerics_alert_report() self.assertEqual(2, len(report)) self.assertTrue(report[0].device_name.lower().endswith("cpu:0")) self.assertEqual("u:0", report[0].tensor_name) self.assertGreater(report[0].first_timestamp, 0) self.assertEqual(0, report[0].nan_event_count) self.assertEqual(0, report[0].neg_inf_event_count) self.assertEqual(1, report[0].pos_inf_event_count) self.assertTrue(report[1].device_name.lower().endswith("cpu:0")) self.assertEqual("u:0", report[0].tensor_name) self.assertGreaterEqual(report[1].first_timestamp, report[0].first_timestamp) self.assertEqual(1, report[1].nan_event_count) self.assertEqual(1, report[1].neg_inf_event_count) self.assertEqual(0, report[1].pos_inf_event_count)
layer_1_nodes = 50 layer_2_nodes = 165 layer_3_nodes = 50 # Defining the model. with tf.variable_scope('input'): X = tf.placeholder(tf.float32, shape=(None, number_of_inputs)) with tf.variable_scope('layer_1'): weights = tf.get_variable('weights1', shape=[number_of_inputs, layer_1_nodes], initializer=tf.initializers.glorot_normal()) biases = tf.get_variable('biases1', shape=[layer_1_nodes], initializer=tf.zeros_initializer()) layer_1_output = tf.nn.relu(tf.matmul(X, weights) + biases) with tf.variable_scope('layer_2'): weights = tf.get_variable('weights2', shape=[layer_1_nodes, layer_2_nodes], initializer=tf.initializers.glorot_normal()) biases = tf.get_variable(name='biases2', shape=[layer_2_nodes], initializer=tf.zeros_initializer()) layer_2_output = tf.nn.relu(tf.matmul(layer_1_output, weights) + biases) with tf.variable_scope('layer_3'): weights = tf.get_variable('weights3', shape=[layer_2_nodes, layer_3_nodes], initializer=tf.initializers.glorot_normal()) biases = tf.get_variable(name='biases3',
def embedding_postprocessor(input_tensor, use_token_type=False, token_type_ids=None, token_type_vocab_size=16, token_type_embedding_name="token_type_embeddings", use_position_embeddings=True, position_embedding_name="position_embeddings", initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.1): """Performs various post-processing on a word embedding tensor. Args: input_tensor: float Tensor of shape [batch_size, seq_length, embedding_size]. use_token_type: bool. Whether to add embeddings for `token_type_ids`. token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length]. Must be specified if `use_token_type` is True. token_type_vocab_size: int. The vocabulary size of `token_type_ids`. token_type_embedding_name: string. The name of the embedding table variable for token type ids. use_position_embeddings: bool. Whether to add position embeddings for the position of each token in the sequence. position_embedding_name: string. The name of the embedding table variable for positional embeddings. initializer_range: float. Range of the weight initialization. max_position_embeddings: int. Maximum sequence length that might ever be used with this model. This can be longer than the sequence length of input_tensor, but cannot be shorter. dropout_prob: float. Dropout probability applied to the final output tensor. Returns: float tensor with same shape as `input_tensor`. Raises: ValueError: One of the tensor shapes or input values is invalid. """ input_shape = get_shape_list(input_tensor, expected_rank=3) batch_size = input_shape[0] seq_length = input_shape[1] width = input_shape[2] output = input_tensor if use_token_type: if token_type_ids is None: raise ValueError("`token_type_ids` must be specified if" "`use_token_type` is True.") token_type_table = tf.get_variable( name=token_type_embedding_name, shape=[token_type_vocab_size, width], initializer=create_initializer(initializer_range)) # This vocab will be small so we always do one-hot here, since it is always # faster for a small vocabulary. flat_token_type_ids = tf.reshape(token_type_ids, [-1]) one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size) token_type_embeddings = tf.matmul(one_hot_ids, token_type_table) token_type_embeddings = tf.reshape(token_type_embeddings, [batch_size, seq_length, width]) output += token_type_embeddings if use_position_embeddings: assert_op = tf.assert_less_equal(seq_length, max_position_embeddings) with tf.control_dependencies([assert_op]): full_position_embeddings = tf.get_variable( name=position_embedding_name, shape=[max_position_embeddings, width], initializer=create_initializer(initializer_range)) # Since the position embedding table is a learned variable, we create it # using a (long) sequence length `max_position_embeddings`. The actual # sequence length might be shorter than this, for faster training of # tasks that do not have long sequences. # # So `full_position_embeddings` is effectively an embedding table # for position [0, 1, 2, ..., max_position_embeddings-1], and the current # sequence has positions [0, 1, 2, ... seq_length-1], so we can just # perform a slice. position_embeddings = tf.slice(full_position_embeddings, [0, 0], [seq_length, -1]) num_dims = len(output.shape.as_list()) # Only the last two dimensions are relevant (`seq_length` and `width`), so # we broadcast among the first dimensions, which is typically just # the batch size. position_broadcast_shape = [] for _ in range(num_dims - 2): position_broadcast_shape.append(1) position_broadcast_shape.extend([seq_length, width]) position_embeddings = tf.reshape(position_embeddings, position_broadcast_shape) output += position_embeddings output = layer_norm_and_dropout(output, dropout_prob) return output
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # parameters learning_rate = 0.001 training_epochs = 15 batch_size = 100 # input place holders X = tf.placeholder(tf.float32, [None, 784]) Y = tf.placeholder(tf.float32, [None, 10]) W1 = tf.get_variable("W1", shape=[784, 512], initializer=tf.contrib.layers.xavier_initializer()) b1 = tf.Variable(tf.random_normal([512])) L1 = tf.nn.relu(tf.matmul(X, W1) + b1) W2 = tf.get_variable("W2", shape=[512, 512], initializer=tf.contrib.layers.xavier_initializer()) b2 = tf.Variable(tf.random_normal([512])) L2 = tf.nn.relu(tf.matmul(L1, W2) + b2) W3 = tf.get_variable("W3", shape=[512, 512], initializer=tf.contrib.layers.xavier_initializer()) b3 = tf.Variable(tf.random_normal([512])) L3 = tf.nn.relu(tf.matmul(L2, W3) + b3) W4 = tf.get_variable("W4", shape=[512, 512],
def outer(x, y): return tf.matmul(tf.expand_dims(x, 1), tf.transpose(tf.expand_dims(y, 1)))
def __init__(self, linear_size, num_layers, residual, batch_norm, max_norm, batch_size, learning_rate, summaries_dir, predict_14=False, dtype=tf.float32): """Creates the linear + relu model Args linear_size: integer. number of units in each layer of the model num_layers: integer. number of bilinear blocks in the model residual: boolean. Whether to add residual connections batch_norm: boolean. Whether to use batch normalization max_norm: boolean. Whether to clip weights to a norm of 1 batch_size: integer. The size of the batches used during training learning_rate: float. Learning rate to start with summaries_dir: String. Directory where to log progress predict_14: boolean. Whether to predict 14 instead of 17 joints dtype: the data type to use to store internal variables """ # There are in total 17 joints in H3.6M and 16 in MPII (and therefore in stacked # hourglass detections). We settled with 16 joints in 2d just to make models # compatible (e.g. you can train on ground truth 2d and test on SH detections). # This does not seem to have an effect on prediction performance. self.HUMAN_2D_SIZE = 16 * 2 # In 3d all the predictions are zero-centered around the root (hip) joint, so # we actually predict only 16 joints. The error is still computed over 17 joints, # because if one uses, e.g. Procrustes alignment, there is still error in the # hip to account for! # There is also an option to predict only 14 joints, which makes our results # directly comparable to those in https://arxiv.org/pdf/1611.09010.pdf self.HUMAN_3D_SIZE = 14 * 3 if predict_14 else 16 * 3 self.input_size = self.HUMAN_2D_SIZE self.output_size = self.HUMAN_3D_SIZE self.isTraining = tf.placeholder(tf.bool, name="isTrainingflag") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Summary writers for train and test runs self.train_writer = tf.summary.FileWriter( os.path.join(summaries_dir, 'train')) self.test_writer = tf.summary.FileWriter( os.path.join(summaries_dir, 'test')) self.linear_size = linear_size self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=dtype, name="learning_rate") self.global_step = tf.Variable(0, trainable=False, name="global_step") decay_steps = 100000 # empirical decay_rate = 0.96 # empirical self.learning_rate = tf.train.exponential_decay( self.learning_rate, self.global_step, decay_steps, decay_rate) # === Transform the inputs === with vs.variable_scope("inputs"): # in=2d poses, out=3d poses enc_in = tf.placeholder(dtype, shape=[None, self.input_size], name="enc_in") dec_out = tf.placeholder(dtype, shape=[None, self.output_size], name="dec_out") self.encoder_inputs = enc_in self.decoder_outputs = dec_out # === Create the linear + relu combos === with vs.variable_scope("linear_model"): # === First layer, brings dimensionality up to linear_size === w1 = tf.get_variable(name="w1", initializer=kaiming, shape=[self.HUMAN_2D_SIZE, linear_size], dtype=dtype) b1 = tf.get_variable(name="b1", initializer=kaiming, shape=[linear_size], dtype=dtype) w1 = tf.clip_by_norm(w1, 1) if max_norm else w1 y3 = tf.matmul(enc_in, w1) + b1 if batch_norm: y3 = tf.layers.batch_normalization(y3, training=self.isTraining, name="batch_normalization") y3 = tf.nn.relu(y3) y3 = tf.nn.dropout(y3, self.dropout_keep_prob) # === Create multiple bi-linear layers === for idx in range(num_layers): y3 = self.two_linear(y3, linear_size, residual, self.dropout_keep_prob, max_norm, batch_norm, dtype, idx) # === Last linear layer has HUMAN_3D_SIZE in output === w4 = tf.get_variable(name="w4", initializer=kaiming, shape=[linear_size, self.HUMAN_3D_SIZE], dtype=dtype) b4 = tf.get_variable(name="b4", initializer=kaiming, shape=[self.HUMAN_3D_SIZE], dtype=dtype) w4 = tf.clip_by_norm(w4, 1) if max_norm else w4 y = tf.matmul(y3, w4) + b4 # === End linear model === # Store the outputs here self.outputs = y self.loss = tf.reduce_mean(tf.square(y - dec_out)) self.loss_summary = tf.summary.scalar('loss/loss', self.loss) # To keep track of the loss in mm self.err_mm = tf.placeholder(tf.float32, name="error_mm") self.err_mm_summary = tf.summary.scalar("loss/error_mm", self.err_mm) # Gradients and update operation for training the model. opt = tf.train.AdamOptimizer(self.learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # Update all the trainable parameters gradients = opt.compute_gradients(self.loss) self.gradients = [[] if i == None else i for i in gradients] self.updates = opt.apply_gradients(gradients, global_step=self.global_step) # Keep track of the learning rate self.learning_rate_summary = tf.summary.scalar( 'learning_rate/learning_rate', self.learning_rate) # To save the model self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
def main(): # Reset Graph tf.reset_default_graph() # Load Data DATA_FILE = "cifar_10_tf_train_test.pkl" train_x,train_y, test_x, test_y = loadData(DATA_FILE) test_y_np = np.array(test_y) print("Train X size:\t", train_x.shape) print("Train Y size:\t", len(train_y)) print("Test X size:\t", test_x.shape) print("Test Y size:\t", len(test_y)) """ imgplot = plt.imshow(data_list[0][0]) plt.colorbar() plt.show() """ # Hyper Parameters batch_size = 100 num_epochs = 3000 learning_rate = .005 # Convolution Layer1 filter_size1 = 5 num_filters1 = 32 # Convolution Layer2 filter_size2 = 5 num_filters2 = 32 # Convolution Layer3 filter_size3 = 3 num_filters3 = 64 # Dimensions of Data img_size = 32 img_depth = 3 # number of channels in the image (red,blue,green) img_size_flat = 32*32*img_depth img_shape = (img_size,img_size,img_depth) num_classes = 10 # Initializers xavier_init = tf.initializers.glorot_normal() #xavier_init = tf.contrib.layers.xavier_initializer() zero_init = tf.zeros_initializer() # Input Variables input_img = tf.placeholder(dtype=tf.uint8, shape=[None, img_size, img_size, img_depth], name="input_img") y = tf.placeholder(dtype=tf.int64, shape=[None], name="labels") # Normalization x = tf.image.convert_image_dtype(input_img,dtype="float32") x = tf.math.divide(x,255) mean = tf.math.reduce_mean(x,0) x = tf.math.subtract(x,mean) y_true = tf.one_hot(y, 10,dtype="float32") # Filters,Weights, and Biases F1_shape = [filter_size1,filter_size1,img_depth,num_filters1] F1 = tf.get_variable(shape=F1_shape, dtype='float32', initializer=xavier_init, name="filter1") F1_bias = tf.get_variable(shape=[num_filters1],dtype='float32', initializer=zero_init, name="filter_bias1") F2_shape = [filter_size2,filter_size2,num_filters1,num_filters2] F2 = tf.get_variable(shape=F2_shape, dtype='float32', initializer=xavier_init, name="filter2") F2_bias = tf.get_variable(shape=[num_filters2],dtype='float32', initializer=zero_init, name="filter_bias2") F3_shape = [filter_size3,filter_size3,num_filters2,num_filters3] F3 = tf.get_variable(shape=F3_shape, dtype='float32', initializer=xavier_init, name="filter3") F3_bias = tf.get_variable(shape=[num_filters3],dtype='float32', initializer=zero_init, name="filter_bias3") weights_fc = tf.get_variable(shape=[576,num_classes] , dtype="float32", initializer=xavier_init, name="weightsfc") bias_fc = tf.get_variable(shape=[10] , dtype="float32", initializer=zero_init, name="biasfc") # Forward Propagation conv_layer1 = tf.nn.leaky_relu(tf.nn.conv2d(x, filters=F1, strides=[1,1,1,1],padding="VALID") + F1_bias) pool1 = tf.nn.pool(conv_layer1, window_shape=[2,2],pooling_type="MAX", strides=[2,2], padding="VALID") conv_layer2 = tf.nn.leaky_relu(tf.nn.conv2d(pool1, filters=F2, strides=[1,1,1,1],padding="VALID") + F2_bias) pool2 = tf.nn.pool(conv_layer2, window_shape=[2,2],pooling_type="MAX", strides=[2,2], padding="VALID") conv_layer3 = tf.nn.leaky_relu(tf.nn.conv2d(pool2, filters=F3, strides=[1,1,1,1],padding="VALID") + F3_bias) # Vectorize Final Convolution conv_vector = tf.layers.flatten(conv_layer3) print(conv_layer1.get_shape()) print(conv_layer2.get_shape()) print(conv_layer3.get_shape()) print(conv_vector.get_shape()) # Fully Connected Layer logits = tf.matmul(conv_vector, weights_fc)+bias_fc softmax_op = tf.nn.softmax(logits) predict_lbl = tf.argmax(softmax_op, axis=1, name='predict_lbl') # Cost Function cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits, name=None) correct_prediction = tf.equal(predict_lbl, y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) cost = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Create the collection. tf.get_collection("validation_nodes") # Add stuff to the collection. tf.add_to_collection("validation_nodes", input_img) tf.add_to_collection("validation_nodes", predict_lbl) # start training saver = tf.train.Saver() # Plot Variables cost_list = [] train_accuracy_list = [] test_accuracy_list = [] test_accuracy_cls = {} start_time = time.time() # Initialize the Graph init = tf.global_variables_initializer() with tf.Session() as sess: print("\n\n\n") sess.run(init) index = 0 trained_set = set() for e in range(num_epochs): time.sleep(.1) indlimit = train_x.shape[0]-batch_size index = random.randint(0,indlimit) for i in range(index,index+batch_size):trained_set.add(int(i)) x_batch = train_x[index: index+batch_size] y_batch = train_y[index: index+batch_size] permutation = np.random.permutation(len(y_batch)) x_batch = x_batch[permutation,:] y_batch = np.asarray(y_batch)[permutation] sess.run(optimizer, feed_dict={input_img:x_batch, y:y_batch}) # Store Values for plots cost_list.append(sess.run(cost, feed_dict={input_img:x_batch, y:y_batch})) train_accuracy_list.append(sess.run(accuracy, feed_dict={input_img:x_batch, y:y_batch})) if(e%100==0): print("Iteration:\t", e) print("Index Start:\t",index) print("Len Trained Set:", len(trained_set)) predict_test = sess.run(predict_lbl, feed_dict={input_img:test_x}) test_accuracy = np.sum(predict_test==test_y_np)/5000 test_accuracy_list.append(test_accuracy) print("Test Accuracy:", test_accuracy) print() # this saver.save() should be within the same tf.Session() after the training is conv1_filters = sess.run(F1, feed_dict={input_img:x_batch}) conv1_filter_images = ((conv1_filters + 0.1) * (1/0.3) * 255).astype('uint8') save_path = saver.save(sess, "my_model") for pred in range(len(predict_test)): if test_y_np[pred] not in test_accuracy_cls: test_accuracy_cls[test_y_np[pred]]={} test_accuracy_cls[test_y_np[pred]]["correct"] = 0 test_accuracy_cls[test_y_np[pred]]["total"] = 0 test_accuracy_cls[test_y_np[pred]]["total"] += 1 if(test_y_np[pred]==predict_test[pred]): test_accuracy_cls[test_y_np[pred]]["total"] += 1 print(test_accuracy_cls) end_time = time.time() print("Time Ellapsed:", end_time-start_time) plt.plot(range(0,len(train_accuracy_list)), train_accuracy_list) plt.title('Total Training Accuracy') plt.xlabel('Iterations') plt.ylabel('%Accuracy') plt.show() plt.plot(range(0,len(cost_list)), cost_list) plt.title('Total Error/Cost') plt.xlabel('Iterations') plt.ylabel('Cost') plt.show() plt.plot(range(0,len(test_accuracy_list)), test_accuracy_list) plt.title('Total Test Accuracy') plt.xlabel('Iterations') plt.ylabel('%Accuracy') plt.show() # TODO: plot filters print(conv1_filter_images.shape) conv1_filter_images = conv1_filter_images.T for i in range(32): plt.subplot(4,8,i+1) plt.title("Filter "+str(i+1),fontsize=6) plt.axis("off") plt.imshow(conv1_filter_images[i].T) plt.show()
def two_linear(self, xin, linear_size, residual, dropout_keep_prob, max_norm, batch_norm, dtype, idx): """ Make a bi-linear block with optional residual connection Args xin: the batch that enters the block linear_size: integer. The size of the linear units residual: boolean. Whether to add a residual connection dropout_keep_prob: float [0,1]. Probability of dropping something out max_norm: boolean. Whether to clip weights to 1-norm batch_norm: boolean. Whether to do batch normalization dtype: type of the weigths. Usually tf.float32 idx: integer. Number of layer (for naming/scoping) Returns y: the batch after it leaves the block """ with vs.variable_scope("two_linear_" + str(idx)) as scope: input_size = int(xin.get_shape()[1]) # Linear 1 w2 = tf.get_variable(name="w2_" + str(idx), initializer=kaiming, shape=[input_size, linear_size], dtype=dtype) b2 = tf.get_variable(name="b2_" + str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype) w2 = tf.clip_by_norm(w2, 1) if max_norm else w2 y = tf.matmul(xin, w2) + b2 if batch_norm: y = tf.layers.batch_normalization(y, training=self.isTraining, name="batch_normalization1" + str(idx)) y = tf.nn.relu(y) y = tf.nn.dropout(y, dropout_keep_prob) # Linear 2 w3 = tf.get_variable(name="w3_" + str(idx), initializer=kaiming, shape=[linear_size, linear_size], dtype=dtype) b3 = tf.get_variable(name="b3_" + str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype) w3 = tf.clip_by_norm(w3, 1) if max_norm else w3 y = tf.matmul(y, w3) + b3 if batch_norm: y = tf.layers.batch_normalization(y, training=self.isTraining, name="batch_normalization2" + str(idx)) y = tf.nn.relu(y) y = tf.nn.dropout(y, dropout_keep_prob) # Residual every 2 blocks y = (xin + y) if residual else y return y
######################################################### """Matrix Addition""" graph2 = tf.Graph() with graph2.as_default(): m1 = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) m2 = tf.constant([[0, 0, 1], [0, 0, 0], [1, 0, 0]]) m_sum = tf.add(m1, m2) m_sum2 = m1 + m2 with tf.Session(graph=graph2) as sess: result = sess.run(m_sum) print(result) result = sess.run(m_sum2) print(result) ################################################### """matrix multiplication""" graph3 = tf.Graph() with graph3.as_default(): m1 = tf.constant([[2, 2], [3, 3]]) m2 = tf.constant([[1, 0], [0, 1]]) m_mul = tf.matmul(m1, m2) with tf.Session(graph=graph3) as sess: result = sess.run(m_mul) print(result) #####################################################
b_conv4 = bias_variable([64]) h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4, 1) + b_conv4) #fifth convolutional layer W_conv5 = weight_variable([3, 3, 64, 64]) b_conv5 = bias_variable([64]) h_conv5 = tf.nn.relu(conv2d(h_conv4, W_conv5, 1) + b_conv5) #FCL 1 W_fc1 = weight_variable([1152, 1164]) b_fc1 = bias_variable([1164]) h_conv5_flat = tf.reshape(h_conv5, [-1, 1152]) h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1) keep_prob = tf.placeholder(tf.float32) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) #FCL 2 W_fc2 = weight_variable([1164, 100]) b_fc2 = bias_variable([100]) h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob) #FCL 3 W_fc3 = weight_variable([100, 50]) b_fc3 = bias_variable([50])
def fit(net, img_shape, img_name, image_mode, type_measurements, num_measurements, y_feed, A_feed, mask_info1, ini_channel = 32, mask_feed = None, lr_decay_epoch=0, lr_decay_rate=0.65, LR=0.01, OPTIMIZER='adam', num_iter=5000, find_best=False, verbose=False, random_vector = None, selection_mask = None, save = False, random_array = None): with tf.Graph().as_default(): # Global step global_step = tf.train.get_or_create_global_step() # Set up palceholders n_input = img_shape[1]*img_shape[2]*img_shape[3] width = int(img_shape[1]) height = int(img_shape[2]) if mask_feed is None: if type_measurements == 'random': #compressed sensing with random matirx A = tf.placeholder(tf.float32, shape=(n_input, num_measurements), name='A') #e.g.[img_wid*img_high*3, 200] y = tf.placeholder(tf.float32, shape=(1, num_measurements), name='y') #e.g.[1, 200] #rand = tf.placeholder(tf.float32, shape=(1, width, height, ini_channel), name='random_noise') #e.g.[1,img_wid,img_high,32] elif type_measurements == 'identity': #denosing if image_mode != '3D': A = tf.placeholder(tf.float32, shape=(n_input, n_input), name='A') #e.g.[img_wid*img_high*3, img_wid*img_high*3] ########!!!!!!#####!!!!!!! y = tf.placeholder(tf.float32, shape=(1, n_input), name='y') #e.g.[1, img_wid*img_high*3] #rand = tf.placeholder(tf.float32, shape=(1, width, height, ini_channel), name='random_noise') #e.g.[1,img_wid,img_high,32] elif type_measurements == 'circulant': #compressed sensing with circulant matirx y = tf.placeholder(tf.float32, shape=(1, n_input), name='y')#e.g.[1, img_wid*img_high*3] #rand = tf.placeholder(tf.float32, shape=(1, width, height, ini_channel), name='random_noise') #e.g.[1,img_wid,img_high,32] else: #inpainting y = tf.placeholder(tf.float32, shape=(1, img_shape[1], img_shape[2], img_shape[3]), name='y')#e.g.[1, img_wid, img_high, 3] #rand = tf.placeholder(tf.float32, shape=(1, width, height, ini_channel), name='random_noise') #e.g.[1,img_wid,img_high,32] # Define input uniform noise #rand = np.random.uniform(0, 1.0/30.0, size=(1, width, height, ini_channel)).astype(np.float32) out = tf.constant(np.random.uniform(size=(1, width, height, ini_channel)).astype(np.float32) * 1. / 10) #+ rand #[1,4096,1,32] out = tf.Variable(out, name='input_noise', trainable=False) # Deep image prior feed_forward = tf.make_template("DeepImagePrior", net) #feed_forward takes a 4D Tensor (batch, width, height, channels) as input and outputs a 4D Tensor (batch, width*2^6, height*2^6, channels=3) x = feed_forward(out) #e.g. net_output with shape [1, img_wid, img_high, 3] # Inverse problem settings def circulant_tf(signal_vector, random_vector_m, selection_mask_m): signal_vector = tf.cast(signal_vector, dtype=tf.complex64, name='circulant_real2complex') t = tf.convert_to_tensor(random_vector_m, dtype=tf.complex64) #step 1: F^{-1} @ x r1 = tf.signal.ifft(signal_vector, name='circulant_step1_ifft') #step 2: Diag() @ F^{-1} @ x Ft = tf.signal.fft(t) r2 = tf.multiply(r1, Ft, name='circulant_step2_diag') #step 3: F @ Diag() @ F^{-1} @ x compressive = tf.signal.fft(r2, name='circulant_step3_fft') float_compressive = tf.cast(compressive, tf.float32, name='circulant_complex2real') #step 4: R_{omega} @ C_{t} select_compressive = tf.multiply(float_compressive, selection_mask_m, name='circulant_step4_A') return select_compressive if mask_feed is None: # Compressed sensing & Denoising if type_measurements == 'circulant': # Compressed sensing with Circulant matrix flip = tf.convert_to_tensor(random_array, dtype=tf.float32) # flip x_circulant = tf.reshape(x, [1,-1]) * flip y_hat = circulant_tf(x_circulant, random_vector, selection_mask) else: # Compressed sensing with Random matrix & Denoising if image_mode != '3D': y_hat = tf.matmul(tf.reshape(x, [1,-1]), A) ########!!!!!!#####!!!!!!! else: y_hat = tf.reshape(x, [1,-1]) else: # Inpainting y_hat = x * mask_feed # Define loss mse = tf.losses.mean_squared_error loss = mse(y, y_hat) # Define learning rate if lr_decay_epoch > 0: LR = tf.train.exponential_decay(LR, global_step, lr_decay_epoch, lr_decay_rate, staircase=True) # Define optimizer if OPTIMIZER == 'adam': #print("optimize with adam", LR) optimizer = tf.train.AdamOptimizer(LR) elif OPTIMIZER == 'LBFGS': raise NotImplementedError('LBFGS Optimizer') update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step) # Set up gpu config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.85 config.log_device_placement= True with tf.Session() as sess: # Init mse = [0.] * num_iter sess.run(tf.global_variables_initializer()) # Initial deep decoder output if find_best: if not os.path.exists('log'): os.makedirs('log/') if not os.path.exists('result'): os.makedirs('result/') saver = tf.train.Saver(max_to_keep=1) #saver.save(sess, os.path.join('log/', 'model.ckpt'), global_step=0) best_mse = 1000000.0 best_img = sess.run(x) #save_img(best_img, 'result/', img_name, '0', image_mode, decoder_type, filter_size, upsample_mode) # Feed dict if mask_feed is None: if type_measurements == 'circulant':#compressed sensing feed_dict = {y: y_feed} elif type_measurements == 'identity': if image_mode != '3D': feed_dict = {A: A_feed, y: y_feed} ########!!!!!!#####!!!!!!! else: feed_dict = {y: y_feed} else:#inpainting feed_dict = {y: y_feed} # Optimize num_params = get_num_params() sess.graph.finalize() #print('\x1b[37mFinal graph size: %.2f MB\x1b[0m' % (tf.get_default_graph().as_graph_def().ByteSize() / 10e6)) for i in range(num_iter): loss_, _ = sess.run([loss, train_op], feed_dict=feed_dict) #psnr = 10 * np.log10(1 * 1 / loss_) #PSNR mse[i] = loss_ # Display #if i > 0 and i % 100 == 0: # print ('\r[Iteration %05d] loss=%.9f' % (i, loss_), end='') # Best net if find_best and best_mse > 1.005 * loss_: best_mse = loss_ #best_psnr = 10 * np.log10(1 * 1 / best_mse) best_img = sess.run(x) #saver.save(sess, os.path.join('log/', 'model.ckpt'), global_step=i + 1) # Return final image or best found so far if `find_best` if find_best: out_img = best_img #mask_info = mask_info1[8:-4] # if save: # save_img(out_img, 'result/', img_name, '{}'.format(i + 1), image_mode, decoder_type, filter_size, upsample_mode, num_channels_real, num_layers, input_size, mask_info, act_function) #print('Best MSE (wrt noisy) {}: {}: {}: {}: {}: {}: {}: {}: {}'.format(num_channels_real, num_layers, img_name, mask_info, decoder_type, filter_size, upsample_mode, upsample_factor, best_mse)) else: out_img = sess.run(x) #mask_info = mask_info1[8:-4] # if save: # save_img(out_img, 'result/', img_name, '{}'.format(i + 1), image_mode, decoder_type, filter_size, upsample_mode, num_channels_real, num_layers, input_size, mask_info, act_function) #print('FINAL MSE (wrt noisy) {}: {}: {}: {}: {}: {}: {}: {}: {}'.format(num_channels_real, num_layers, img_name, mask_info, decoder_type, filter_size, upsample_mode, upsample_factor, mse[-1])) if verbose: return mse, out_img, num_params else: return mse, out_img
with tf1.Session() as sess: print(y.eval()) print(z.eval()) ### Linear Regression with Tensorflow import numpy as np from sklearn.datasets import fetch_california_housing housing = fetch_california_housing() m, n = housing.data.shape housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data] X = tf1.constant(housing_data_plus_bias, dtype=tf1.float32, name="X") y = tf1.constant(housing.target.reshape(-1, 1), dtype=tf1.float32, name="y") XT = tf1.transpose(X) theta = tf1.matmul(tf1.matmul(tf1.matrix_inverse(tf1.matmul(XT, X)), XT), y) with tf1.Session() as sess: theta_value = theta.eval() print(theta_value) ### Gradient Descent from sklearn.preprocessing import StandardScaler scaler = StandardScaler() housing_scaled = scaler.fit_transform(housing.data) housing_data_plus_bias = np.c_[np.ones((m, 1)), housing_scaled] housing_data_plus_bias[0] n_epochs = 1000 learning_rate = .01
import numpy as np x_data = np.array([[0, 0], [1, 0], [1, 1], [0, 0], [0, 0], [0, 1]]) y_data = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 0, 1]]) X = tf.placeholder(tf.float32) Y = tf.placeholder(tf.float32) W1 = tf.Variable(tf.random.uniform([2, 10], -1., 1.)) W2 = tf.Variable(tf.random.uniform([10, 3], -1., 1.)) b1 = tf.Variable(tf.zeros([10])) b2 = tf.Variable(tf.zeros([3])) L1 = tf.add(tf.matmul(X, W1), b1) L1 = tf.nn.relu(L1) model = tf.add(tf.matmul(L1, W2), b2) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=model)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) train_op = optimizer.minimize(cost) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) for step in range(100): sess.run(train_op, feed_dict={X: x_data, Y: y_data})
mnist = input_data.read_data_sets("MNIST_data", one_hot=True) import tensorflow.compat.v1 as tf learning_rate = 0.01 training_iteration = 30 batch_size = 100 display_step = 2 x = tf.placeholder("float", [None, 784]) #Input Vector y = tf.placeholder("float", [None, 10]) #Output Vector W = tf.Variable(tf.zeros([784, 10])) #Weight Tensor b = tf.Variable(tf.zeros([10])) #Bias Tensor with tf.name_scope("Wx_b") as scope: model = tf.nn.softmax(tf.matmul(x, W) + b) w_h = tf.summary.histogram("weights", W) b_h = tf.summary.histogram("biases", b) with tf.name_scope("cost_function") as scope: cost_function = -tf.reduce_sum(y * tf.log(model)) tf.summary.scalar("cost_function", cost_function) with tf.name_scope("train") as scope: optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize( cost_function) init = tf.initialize_all_variables() merged_summary_op = tf.summary.merge_all()
import numpy as np from sklearn.datasets import fetch_california_housing reset_graph() housing = fetch_california_housing() m, n = housing.data.shape print(m, n) print(housing.target.reshape(-1, 1)) housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data] X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X") y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y") XT = tf.transpose(X) print("XT", XT) theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y) print("theta", theta) with tf.Session() as sess: theta_value = theta.eval() print("线性回归 theta_value \r\n", theta_value) # 梯度下降 reset_graph() n_epochs = 1000 learning_rate = 0.01 from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaled_housing_data = scaler.fit_transform(housing.data)
strides=[1, 1, h1, 1], padding='SAME') #output=545/4 #1 LAYER************************************************************************************* #Rectifier LAYER***************************************************************************** #calculated coefficient for the flattening from the size of the 3 convolutional layer coef = int(h_pool1.get_shape()[1] * h_pool1.get_shape()[2] * h_pool1.get_shape()[3]) h_pool2_flat = tf.reshape(h_pool1, [-1, coef]) #declare the weights considering the constants and 256 output W_fc1 = weight_variable([coef, w4]) b_fc1 = bias_variable([w4]) #rectifier (matmul) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) #Rectifier LAYER***************************************************************************** #Rectifier-Dropout LAYER********************************************************************** #dropout h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) #declare weights with the ouput layer in this case 2 (labelSize) W_fc2 = weight_variable([w4, labelSize]) b_fc2 = bias_variable([labelSize]) #output y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 #Rectifier-Dropout LAYER********************************************************************** #Loss Function******************************************************************************** #cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[0])) cross_entropy = tf.reduce_mean(
def attention_layer(from_tensor, to_tensor, attention_mask=None, num_attention_heads=1, size_per_head=512, query_act=None, key_act=None, value_act=None, attention_probs_dropout_prob=0.0, initializer_range=0.02, do_return_2d_tensor=False, batch_size=None, from_seq_length=None, to_seq_length=None): """Performs multi-headed attention from `from_tensor` to `to_tensor`. This is an implementation of multi-headed attention based on "Attention is all you Need". If `from_tensor` and `to_tensor` are the same, then this is self-attention. Each timestep in `from_tensor` attends to the corresponding sequence in `to_tensor`, and returns a fixed-with vector. This function first projects `from_tensor` into a "query" tensor and `to_tensor` into "key" and "value" tensors. These are (effectively) a list of tensors of length `num_attention_heads`, where each tensor is of shape [batch_size, seq_length, size_per_head]. Then, the query and key tensors are dot-producted and scaled. These are softmaxed to obtain attention probabilities. The value tensors are then interpolated by these probabilities, then concatenated back to a single tensor and returned. In practice, the multi-headed attention are done with transposes and reshapes rather than actual separate tensors. Args: from_tensor: float Tensor of shape [batch_size, from_seq_length, from_width]. to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width]. attention_mask: (optional) int32 Tensor of shape [batch_size, from_seq_length, to_seq_length]. The values should be 1 or 0. The attention scores will effectively be set to -infinity for any positions in the mask that are 0, and will be unchanged for positions that are 1. num_attention_heads: int. Number of attention heads. size_per_head: int. Size of each attention head. query_act: (optional) Activation function for the query transform. key_act: (optional) Activation function for the key transform. value_act: (optional) Activation function for the value transform. attention_probs_dropout_prob: (optional) float. Dropout probability of the attention probabilities. initializer_range: float. Range of the weight initializer. do_return_2d_tensor: bool. If True, the output will be of shape [batch_size * from_seq_length, num_attention_heads * size_per_head]. If False, the output will be of shape [batch_size, from_seq_length, num_attention_heads * size_per_head]. batch_size: (Optional) int. If the input is 2D, this might be the batch size of the 3D version of the `from_tensor` and `to_tensor`. from_seq_length: (Optional) If the input is 2D, this might be the seq length of the 3D version of the `from_tensor`. to_seq_length: (Optional) If the input is 2D, this might be the seq length of the 3D version of the `to_tensor`. Returns: float Tensor of shape [batch_size, from_seq_length, num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is true, this will be of shape [batch_size * from_seq_length, num_attention_heads * size_per_head]). Raises: ValueError: Any of the arguments or tensor shapes are invalid. """ def transpose_for_scores(input_tensor, batch_size, num_attention_heads, seq_length, width): output_tensor = tf.reshape( input_tensor, [batch_size, seq_length, num_attention_heads, width]) output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3]) return output_tensor from_shape = get_shape_list(from_tensor, expected_rank=[2, 3]) to_shape = get_shape_list(to_tensor, expected_rank=[2, 3]) if len(from_shape) != len(to_shape): raise ValueError( "The rank of `from_tensor` must match the rank of `to_tensor`.") if len(from_shape) == 3: batch_size = from_shape[0] from_seq_length = from_shape[1] to_seq_length = to_shape[1] elif len(from_shape) == 2: if batch_size is None or from_seq_length is None or to_seq_length is None: raise ValueError( "When passing in rank 2 tensors to attention_layer, the values " "for `batch_size`, `from_seq_length`, and `to_seq_length` " "must all be specified.") # Scalar dimensions referenced here: # B = batch size (number of sequences) # F = `from_tensor` sequence length # T = `to_tensor` sequence length # N = `num_attention_heads` # H = `size_per_head` from_tensor_2d = reshape_to_matrix(from_tensor) to_tensor_2d = reshape_to_matrix(to_tensor) # `query_layer` = [B*F, N*H] query_layer = tf.layers.dense( from_tensor_2d, num_attention_heads * size_per_head, activation=query_act, name="query", kernel_initializer=create_initializer(initializer_range)) # `key_layer` = [B*T, N*H] key_layer = tf.layers.dense( to_tensor_2d, num_attention_heads * size_per_head, activation=key_act, name="key", kernel_initializer=create_initializer(initializer_range)) # `value_layer` = [B*T, N*H] value_layer = tf.layers.dense( to_tensor_2d, num_attention_heads * size_per_head, activation=value_act, name="value", kernel_initializer=create_initializer(initializer_range)) # `query_layer` = [B, N, F, H] query_layer = transpose_for_scores(query_layer, batch_size, num_attention_heads, from_seq_length, size_per_head) # `key_layer` = [B, N, T, H] key_layer = transpose_for_scores(key_layer, batch_size, num_attention_heads, to_seq_length, size_per_head) # Take the dot product between "query" and "key" to get the raw # attention scores. # `attention_scores` = [B, N, F, T] attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True) attention_scores = tf.multiply(attention_scores, 1.0 / math.sqrt(float(size_per_head))) if attention_mask is not None: # `attention_mask` = [B, 1, F, T] attention_mask = tf.expand_dims(attention_mask, axis=[1]) # Since attention_mask is 1.0 for positions we want to attend and 0.0 for # masked positions, this operation will create a tensor which is 0.0 for # positions we want to attend and -10000.0 for masked positions. adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0 # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. attention_scores += adder # Normalize the attention scores to probabilities. # `attention_probs` = [B, N, F, T] attention_probs = tf.nn.softmax(attention_scores) # This is actually dropping out entire tokens to attend to, which might # seem a bit unusual, but is taken from the original Transformer paper. attention_probs = dropout(attention_probs, attention_probs_dropout_prob) # `value_layer` = [B, T, N, H] value_layer = tf.reshape( value_layer, [batch_size, to_seq_length, num_attention_heads, size_per_head]) # `value_layer` = [B, N, T, H] value_layer = tf.transpose(value_layer, [0, 2, 1, 3]) # `context_layer` = [B, N, F, H] context_layer = tf.matmul(attention_probs, value_layer) # `context_layer` = [B, F, N, H] context_layer = tf.transpose(context_layer, [0, 2, 1, 3]) if do_return_2d_tensor: # `context_layer` = [B*F, N*H] context_layer = tf.reshape(context_layer, [ batch_size * from_seq_length, num_attention_heads * size_per_head ]) else: # `context_layer` = [B, F, N*H] context_layer = tf.reshape( context_layer, [batch_size, from_seq_length, num_attention_heads * size_per_head]) return context_layer, attention_probs
def batch_loss(model, batch): predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias) return -tf.reduce_mean(tf.reduce_sum( tf.one_hot(batch.y, 10) * tf.log(predicted_y), axis=[1]))
def GetProjectLastDim(cls, inputs, weight, input_dim, output_dim, proj_obj): """Linear projection on the last dim of the input tensor along with pruning. This is a TPU efficient implementation to avoid reshaping inputs to Rank-2 tensor by using Einsum for the compute. Args: inputs: An input Tensor, the last dimension of which is input_dim. weight: A weight matrix with shape [input_dim, output_dim]. input_dim: An integer or a symbolic dim, the last dimension of the inputs. output_dim: An integer or a symbolic dim, the last dimension of the outputs. proj_obj: a ProjectionLayer object. Returns: An output Tensor of the same rank as inputs, the last dimension is output_dim. """ theta = proj_obj.theta p = proj_obj.params input_dim = int( symbolic.ToStatic(input_dim) if symbolic.IsExpr(input_dim ) else input_dim) output_dim = int( symbolic.ToStatic(output_dim) if symbolic.IsExpr(output_dim ) else output_dim) if (py_utils.use_tpu() and inputs.shape is not None and inputs.shape.rank is not None and inputs.shape.rank < 26): # Avoids reshape if feasible and uses Einsum. if inputs.shape.rank == 2: outputs = tf.matmul(inputs, weight) else: outputs = cls.GetEinSumResult(inputs, proj_obj) else: if p.pruning_hparams_dict['compress_input']: blocked_inputs = tf.reshape( inputs, py_utils.ToStaticShape( [-1, p.pruning_hparams_dict['input_block_size']])) compressed_inputs = tf.reshape( py_utils.Matmul(blocked_inputs, theta.b_matrix_tfvar), py_utils.ToStaticShape([ -1, input_dim // p.pruning_hparams_dict['input_compression_factor'] ])) else: compressed_inputs = tf.reshape(inputs, py_utils.ToStaticShape([-1, input_dim])) intermediate_result = py_utils.Matmul(compressed_inputs, theta.c_matrix_tfvar) if p.pruning_hparams_dict['compress_output']: blocked_intermediate_result = tf.reshape( intermediate_result, py_utils.ToStaticShape([ -1, p.pruning_hparams_dict['output_block_size'] // p.pruning_hparams_dict['output_compression_factor'] ])) outputs = py_utils.Matmul(blocked_intermediate_result, theta.d_matrix_tfvar) else: outputs = intermediate_result outputs = tf.reshape( outputs, tf.concat([ tf.cast(py_utils.GetShape(inputs)[:-1], tf.int32), py_utils.ToStaticShape([output_dim]) ], axis=0)) return outputs
def build_model(self, task_id, prediction=False, splitting=False, expansion=None): bottom = self.X if splitting: for i in range(1, self.n_layers): prev_w = np.copy(self.prev_W_split['layer%d' % i + '/weight:0']) cur_w = np.copy(self.cur_W['layer%d' % i + '/weight:0']) indices = self.unit_indices['layer%d' % i] next_dim = prev_w.shape[1] if 2 <= i < self.n_layers: below_dim = prev_w.shape[0] below_indices = self.unit_indices['layer%d' % (i - 1)] bottom_p_prev_ary, bottom_p_new_ary, bottom_c_prev_ary, bottom_c_new_ary = [], [], [], [] for j in range(below_dim): if j in below_indices: bottom_p_prev_ary.append(prev_w[j, :]) bottom_p_new_ary.append(cur_w[j, :]) bottom_c_prev_ary.append(cur_w[j, :]) bottom_c_new_ary.append(cur_w[j, :]) else: bottom_p_prev_ary.append(cur_w[j, :]) bottom_c_prev_ary.append(cur_w[j, :]) prev_w = np.array(bottom_p_prev_ary + bottom_p_new_ary).astype(np.float32) cur_w = np.array(bottom_c_prev_ary + bottom_c_new_ary).astype(np.float32) prev_ary = [] new_ary = [] for j in range(next_dim): if j in indices: prev_ary.append(prev_w[:, j]) new_ary.append(cur_w[:, j]) # will be expanded else: prev_ary.append(cur_w[:, j]) # fully connected, L1 expanded_w = np.array(prev_ary + new_ary).T.astype(np.float32) expanded_b = np.concatenate( (self.prev_W_split['layer%d' % i + '/biases:0'], np.random.rand(len(new_ary)))).astype(np.float32) with tf.variable_scope('layer%d' % i): w = tf.get_variable('weight', initializer=expanded_w, trainable=True) b = tf.get_variable('biases', initializer=expanded_b, trainable=True) self.params[w.name] = w self.params[b.name] = b bottom = tf.nn.relu(tf.matmul(bottom, w) + b) w, b = self.extend_top('layer%d' % self.n_layers, len(new_ary)) self.y = tf.matmul(bottom, w) + b elif expansion: for i in range(1, self.n_layers): if i == 1: w, b = self.extend_bottom('layer%d' % i, self.ex_k) else: w, b = self.extend_param('layer%d' % i, self.ex_k) bottom = tf.nn.relu(tf.matmul(bottom, w) + b) w, b = self.extend_param('layer%d' % self.n_layers, self.ex_k) self.y = tf.matmul(bottom, w) + b elif prediction: stamp = self.time_stamp['task%d' % task_id] for i in range(1, self.n_layers): w = self.get_variable('layer%d' % i, 'weight', False) b = self.get_variable('layer%d' % i, 'biases', False) w = w[:stamp[i - 1], :stamp[i]] b = b[:stamp[i]] print(' [*] task %d, shape : %s' % (i, w.get_shape().as_list())) bottom = tf.nn.relu(tf.matmul(bottom, w) + b) w = self.get_variable('layer%d' % self.n_layers, 'weight_%d' % task_id, False) b = self.get_variable('layer%d' % self.n_layers, 'biases_%d' % task_id, False) w = w[:stamp[self.n_layers - 1], :stamp[self.n_layers]] b = b[:stamp[self.n_layers]] self.y = tf.matmul(bottom, w) + b else: for i in range(1, self.n_layers): w = self.get_variable('layer%d' % i, 'weight', True) b = self.get_variable('layer%d' % i, 'biases', True) bottom = tf.nn.relu(tf.matmul(bottom, w) + b) prev_dim = bottom.get_shape().as_list()[1] w = self.create_variable('layer%d' % self.n_layers, 'weight_%d' % task_id, [prev_dim, self.n_classes], True) b = self.create_variable('layer%d' % self.n_layers, 'biases_%d' % task_id, [self.n_classes], True) self.y = tf.matmul(bottom, w) + b self.yhat = tf.nn.sigmoid(self.y) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=self.y, labels=self.Y)) if prediction: return
def testConcurrentNumericsAlertsAreRegisteredCorrectly(self): num_threads = 3 num_runs_per_thread = 2 total_num_runs = num_threads * num_runs_per_thread # Before any Session runs, the report ought to be empty. self.assertEqual([], self._debug_data_server.numerics_alert_report()) with tf.Session() as sess: x_init_val = np.array([[2.0], [-1.0]]) y_init_val = np.array([[0.0], [-0.25]]) z_init_val = np.array([[0.0, 3.0], [-1.0, 0.0]]) x_init = tf.constant(x_init_val, shape=[2, 1], name="x_init") x = tf.Variable(x_init, name="x") y_init = tf.constant(y_init_val, shape=[2, 1]) y = tf.Variable(y_init, name="y") z_init = tf.constant(z_init_val, shape=[2, 2]) z = tf.Variable(z_init, name="z") u = tf.div(x, y, name="u") # Produces an Inf. v = tf.matmul(z, u, name="v") # Produces NaN and Inf. sess.run(x.initializer) sess.run(y.initializer) sess.run(z.initializer) run_options_list = [] for i in range(num_threads): run_options = tf.RunOptions(output_partition_graphs=True) # Use different grpc:// URL paths so that each thread opens a separate # gRPC stream to the debug data server, simulating multi-worker setting. tf_debug.watch_graph( run_options, sess.graph, debug_ops=["DebugNumericSummary"], debug_urls=[self._debug_url + "/thread%d" % i]) run_options_list.append(run_options) def run_v(thread_id): for _ in range(num_runs_per_thread): sess.run(v, options=run_options_list[thread_id]) run_threads = [] for thread_id in range(num_threads): thread = threading.Thread( target=functools.partial(run_v, thread_id)) thread.start() run_threads.append(thread) for thread in run_threads: thread.join() report = self._debug_data_server.numerics_alert_report() self.assertEqual(2, len(report)) self.assertTrue(report[0].device_name.lower().endswith("cpu:0")) self.assertEqual("u:0", report[0].tensor_name) self.assertGreater(report[0].first_timestamp, 0) self.assertEqual(0, report[0].nan_event_count) self.assertEqual(0, report[0].neg_inf_event_count) self.assertEqual(total_num_runs, report[0].pos_inf_event_count) self.assertTrue(report[1].device_name.lower().endswith("cpu:0")) self.assertEqual("u:0", report[0].tensor_name) self.assertGreaterEqual(report[1].first_timestamp, report[0].first_timestamp) self.assertEqual(total_num_runs, report[1].nan_event_count) self.assertEqual(total_num_runs, report[1].neg_inf_event_count) self.assertEqual(0, report[1].pos_inf_event_count)
def create_model( albert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, max_seq_length, dropout_prob, hub_module, ): """Creates a classification model.""" bsz_per_core = tf.shape(input_ids)[0] input_ids = tf.reshape(input_ids, [bsz_per_core * num_labels, max_seq_length]) input_mask = tf.reshape(input_mask, [bsz_per_core * num_labels, max_seq_length]) token_type_ids = tf.reshape(segment_ids, [bsz_per_core * num_labels, max_seq_length]) (output_layer, _) = fine_tuning_utils.create_albert( albert_config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, segment_ids=token_type_ids, use_one_hot_embeddings=use_one_hot_embeddings, use_einsum=True, hub_module=hub_module, ) hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [1, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02), ) output_bias = tf.get_variable("output_bias", [1], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [bsz_per_core, num_labels]) probabilities = tf.nn.softmax(logits, axis=-1) predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=tf.cast(num_labels, dtype=tf.int32), dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, probabilities, logits, predictions)
def connector_capsule_mat(input_tensor, position_grid, input_activation, input_dim, output_dim, layer_name, num_routing=3, num_in_atoms=3, num_out_atoms=3, leaky=False, final_beta=1.0, min_var=0.0005): """Final Capsule Layer with Pose Matrices and Shared connections.""" # One weight tensor for each capsule of the layer bellow: w: [8*128, 8*10] with tf.variable_scope(layer_name): # This Variable will hold the state of the weights for the layer with tf.name_scope('input_center_connector'): utils.activation_summary(input_tensor) weights = utils.weight_variable( [input_dim, num_out_atoms, output_dim * num_out_atoms], stddev=0.01) # weights = tf.clip_by_norm(weights, 1.0, axes=[1]) activation_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1], init_value=1.0, name='activation_biases') sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1], init_value=2.0, name='sigma_biases') with tf.name_scope('Wx_plus_b'): # input_tensor: [x, 128, 8, h, w] input_shape = tf.shape(input_tensor) input_trans = tf.transpose(input_tensor, [1, 0, 3, 4, 2]) input_share = tf.reshape(input_trans, [input_dim, -1, num_in_atoms]) # input_expanded: [x, 128, 8, 1] wx_share = tf.matmul(input_share, weights) # sqr_num_out_atoms = num_out_atoms num_out_atoms *= num_out_atoms wx_trans = tf.reshape(wx_share, [ input_dim, input_shape[0], input_shape[3], input_shape[4], num_out_atoms, output_dim ]) wx_trans.set_shape( (input_dim, None, input_tensor.get_shape()[3], input_tensor.get_shape()[4], num_out_atoms, output_dim)) h, w, _ = position_grid.get_shape() height = h width = w # t_pose = tf.transpose(position_grid, [2, 0, 1]) # t_pose_exp = tf.scatter_nd([[sqr_num_out_atoms -1], # [2 * sqr_num_out_atoms - 1]], t_pose, [num_out_atoms, height, width]) # pose_g_exp = tf.transpose(t_pose_exp, [1, 2, 0]) zero_grid = tf.zeros([height, width, num_out_atoms - 2]) pose_g_exp = tf.concat([position_grid, zero_grid], axis=2) pose_g = tf.expand_dims( tf.expand_dims(tf.expand_dims(pose_g_exp, -1), 0), 0) wx_posed = wx_trans + pose_g wx_posed_t = tf.transpose(wx_posed, [1, 0, 2, 3, 5, 4]) # Wx_reshaped: [x, 128, 10, 8] wx = tf.reshape(wx_posed_t, [ -1, input_dim * height * width, output_dim, num_out_atoms, 1, 1 ]) with tf.name_scope('routing'): # Routing # logits: [x, 128, 10] logit_shape = [input_dim * height * width, output_dim, 1, 1, 1] for _ in range(4): input_activation = tf.expand_dims(input_activation, axis=-1) activation, center = update_em_routing( wx=wx, input_activation=input_activation, activation_biases=activation_biases, sigma_biases=sigma_biases, logit_shape=logit_shape, num_out_atoms=num_out_atoms, num_routing=num_routing, output_dim=output_dim, leaky=leaky, final_beta=final_beta / 4, min_var=min_var, ) out_activation = tf.squeeze(activation, axis=[1, 3, 4, 5]) out_center = tf.squeeze(center, axis=[1, 4, 5]) return tf.sigmoid(out_activation), out_center
# Playground: S=Start, G=Goal, F=Frozen, H=Hole # SFFF # FHFH # FFFH # HFFG # hyper parameters EPISODES = 20000 LEARNING_RATE = 0.1 DISCOUNT_FACTOR = 0.99 EPSILON = 0.1 # 16x4 network definition input_state = tf.placeholder(tf.float32, shape=(1, 16)) weights = tf.Variable(tf.random_uniform([16, 4], 0, 0.01)) output_Q = tf.matmul(input_state, weights) predicted_action = tf.argmax(output_Q, 1) # loss function next_Q = tf.placeholder(tf.float32, shape=(1, 4)) loss = tf.reduce_sum(tf.square(next_Q - output_Q)) # optimization optimizer = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE) train = optimizer.minimize(loss) # training init = tf.global_variables_initializer() with tf.Session() as session: session.run(init)
def call(self, inputs, state): gate_inputs = tf.matmul(tf.concat([inputs, state], axis=1), self._weights) gate_inputs = tf.nn.bias_add(gate_inputs, self._bias) output = self._activation(gate_inputs) return output, output
def model_fn(model, features, labels, mode): def sum_pooling(embeddings, slots): slot_embeddings = [] for slot in slots: slot_embeddings.append(embeddings[_SLOT_2_IDX[slot]]) if len(slot_embeddings) == 1: return slot_embeddings[0] return tf.add_n(slot_embeddings) global_step = tf.train.get_or_create_global_step() num_slot, embed_size = len(_SLOT_2_BUCKET), 8 xavier_initializer = tf.glorot_normal_initializer() flt.feature.FeatureSlot.set_default_bias_initializer( tf.zeros_initializer()) flt.feature.FeatureSlot.set_default_vec_initializer( tf.random_uniform_initializer(-0.0078125, 0.0078125)) flt.feature.FeatureSlot.set_default_bias_optimizer( tf.train.FtrlOptimizer(learning_rate=0.01)) flt.feature.FeatureSlot.set_default_vec_optimizer( tf.train.AdagradOptimizer(learning_rate=0.01)) # deal with input cols categorical_embed = [] num_slot, embed_dim = len(_SLOT_2_BUCKET), 8 with tf.variable_scope("follower"): for slot, bucket_size in _SLOT_2_BUCKET: fs = model.add_feature_slot(slot, bucket_size) fc = model.add_feature_column(fs) categorical_embed.append(fc.add_vector(embed_dim)) # concate all embeddings slot_embeddings = categorical_embed concat_embedding = tf.concat(slot_embeddings, axis=1) output_size = len(slot_embeddings) * embed_dim model.freeze_slots(features) with tf.variable_scope("follower"): fc1_size, fc2_size, fc3_size = 512, 256, 128 w1 = tf.get_variable('w1', shape=[output_size, fc1_size], dtype=tf.float32, initializer=xavier_initializer) b1 = tf.get_variable( 'b1', shape=[fc1_size], dtype=tf.float32, initializer=tf.zeros_initializer()) w2 = tf.get_variable('w2', shape=[fc1_size, fc2_size], dtype=tf.float32, initializer=xavier_initializer) b2 = tf.get_variable( 'b2', shape=[fc2_size], dtype=tf.float32, initializer=tf.zeros_initializer()) w3 = tf.get_variable('w3', shape=[fc2_size, fc3_size], dtype=tf.float32, initializer=xavier_initializer) b3 = tf.get_variable( 'b3', shape=[fc3_size], dtype=tf.float32, initializer=tf.zeros_initializer()) act1_l = tf.nn.relu(tf.nn.bias_add(tf.matmul(concat_embedding, w1), b1)) act1_l = tf.layers.batch_normalization(act1_l, training=True) act2_l = tf.nn.relu(tf.nn.bias_add(tf.matmul(act1_l, w2), b2)) act2_l = tf.layers.batch_normalization(act2_l, training=True) embedding = tf.nn.relu(tf.nn.bias_add(tf.matmul(act2_l, w3), b3)) embedding = tf.layers.batch_normalization(embedding, training=True) if mode == tf.estimator.ModeKeys.TRAIN: embedding_grad = model.send('embedding', embedding, require_grad=True) optimizer = tf.train.GradientDescentOptimizer(0.1) train_op = model.minimize( optimizer, embedding, grad_loss=embedding_grad, global_step=global_step) return model.make_spec(mode, loss=tf.math.reduce_mean(embedding), train_op=train_op) elif mode == tf.estimator.ModeKeys.PREDICT: return model.make_spec(mode, predictions={'embedding': embedding})