def testChainOfMatmul(self): # MaxBytesInUse is registerd on GPU only. See kernels/memory_stats_ops.cc. if not test.is_gpu_available(): return with self.test_session(use_gpu=True) as sess: matrix_size = 64 matrix_shape = tensor_shape.TensorShape([matrix_size, matrix_size]) dtype = dtypes.float32 matrix_size_in_bytes = matrix_shape.num_elements() * dtype.size a = random_ops.random_uniform(matrix_shape, dtype=dtype) b = random_ops.random_uniform(matrix_shape, dtype=dtype) c = math_ops.matmul(a, b) d = math_ops.matmul(c, b) sess.run(d) max_bytes_in_use = sess.run(memory_stats_ops.MaxBytesInUse()) self.assertGreaterEqual(max_bytes_in_use, matrix_size_in_bytes * 3) self.assertLess(max_bytes_in_use, matrix_size_in_bytes * 4)
def testChainOfMatmul(self): # MaxBytesInUse is registered on GPU only. See kernels/memory_stats_ops.cc. if not test.is_gpu_available(): return with self.test_session(use_gpu=True) as sess: matrix_size = 64 matrix_shape = tensor_shape.TensorShape([matrix_size, matrix_size]) dtype = dtypes.float32 matrix_size_in_bytes = matrix_shape.num_elements() * dtype.size a = random_ops.random_uniform(matrix_shape, dtype=dtype) b = random_ops.random_uniform(matrix_shape, dtype=dtype) c = math_ops.matmul(a, b) d = math_ops.matmul(c, b) sess.run(d) max_bytes_in_use_op = memory_stats_ops.MaxBytesInUse() max_bytes_in_use = sess.run(max_bytes_in_use_op) self.assertGreaterEqual(max_bytes_in_use, matrix_size_in_bytes * 3) self.assertLess(max_bytes_in_use, matrix_size_in_bytes * 4) # run chain with 2 ops, make sure BytesInUse captures intermediate # memory usage a = random_ops.random_uniform(matrix_shape, dtype=dtype) with ops.control_dependencies([a]): bytes_in_use_op = memory_stats_ops.BytesInUse() with ops.control_dependencies([bytes_in_use_op]): b = random_ops.random_uniform(matrix_shape, dtype=dtype) c = math_ops.matmul(a, b) _, bytes_in_use, max_bytes_in_use = sess.run( [c, bytes_in_use_op, max_bytes_in_use_op]) # intermediate result allocates 1 matrix, max usage is at least 2 self.assertGreaterEqual(bytes_in_use, matrix_size_in_bytes * 1) self.assertLess(bytes_in_use, matrix_size_in_bytes * 2) # max usage is still 3 because it reflects maxium from previous .run call self.assertGreaterEqual(max_bytes_in_use, matrix_size_in_bytes * 3)
import numpy as np picture = np.ones([batch_size, 200 * 200], dtype=np.float32) picture_label = np.ones([batch_size], dtype=np.float32) with tf.Session(config=config) as sess: init.run() for epoch in range(n_epochs): #for iteration in range(mnist.train.num_examples // batch_size): for iteration in range(5): #X_batch, y_batch = mnist.train.next_batch(batch_size) sess.run(training_op, feed_dict={ X: picture, y: picture_label }) #, options=run_options, run_metadata=run_metadata) max_bytes_in_use = sess.run(memory_stats_ops.MaxBytesInUse()) / 1e6 print("step:%i, Max Memory used: %.2f MB " % (iteration, max_bytes_in_use)) """ for device in run_metadata.step_stats.dev_stats: device_name = device.device print(".........device:", device.device) for node in device.node_stats: print(" ................node_stats:", str(node)) fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open('timeline_step_%d.json' % iteration, 'w') as f: f.write(chrome_trace) """
def main(_): #from tensorflow_large_model_support import ome #tf.__dict__["gradients"] = memory_saving_gradients.gradients_memory #tf.__dict__["gradients"] = ome.gradients_ome # Import data mnist = input_data.read_data_sets(FLAGS.data_dir) # Create the model x = tf.placeholder(tf.float32, [None, 784]) # Define loss and optimizer y_ = tf.placeholder(tf.int64, [None]) # Build the graph for the deep net y_conv, keep_prob = deepnn(x) with tf.name_scope('loss'): cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y_conv) cross_entropy = tf.reduce_mean(cross_entropy) with tf.name_scope('adam_optimizer'): optimizer = tf.train.AdamOptimizer() grads = tf.gradients(cross_entropy, tf.trainable_variables()) # importent #print ([g.op for g in grads if g is not None]) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_step = optimizer.apply_gradients(grads_and_vars) #train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_) correct_prediction = tf.cast(correct_prediction, tf.float32) accuracy = tf.reduce_mean(correct_prediction) #g = tf.get_default_graph() #print(g.get_operations()) #print ('----------------------------------------------------') # Enable Large Model Support from tensorflow_large_model_support import LMS #import stop #lms_model = LMS({'adam_optimizer'}, debug=False) #lms_model = LMS(None) #import tensorflow.contrib.graph_editor as ge # ''' for scope in {'adam_optimizer'}: ops_for_scope = set(ge.filter_ops_from_regex( ge.make_list_of_op(tf.get_default_graph()), "^{}".format(scope))) #print (ops_for_scope) ''' ''' lms_model = LMS({'adam_optimizer'}, excl_scopes={'loss', 'accuracy', 'dropout'}, lb=3) ''' lms_model = LMS({'adam_optimizer'}) lms_model.run(graph=tf.get_default_graph()) #tf.__dict__['gradients'] = None #print (tf.__dict__["gradients"]) #import tensorflow.contrib.graph_editor as ge ''' for scope in {'adam_optimizer'}: ops_for_scope = set(ge.filter_ops_from_regex( ge.make_list_of_op(tf.get_default_graph()), "^{}".format(scope))) print (ops_for_scope) ''' #print (ge.get_backward_walk_ops( {'adam_optimizer'})) #lms_model.run(tf.get_default_graph()) #print (tf.get_default_graph()) #ops =[op for op in tf.get_default_graph().get_operations()] #print (ops) #print (tf.trainable_variables()) ''' import tensorflow.contrib.graph_editor as ge fwd_ops = ge.get_forward_walk_ops([x.op for x in tf.trainable_variables()], inclusive=True) bwd_ops = ge.get_backward_walk_ops([y.op for y in [cross_entropy]], inclusive=True) print (fwd_ops) print (bwd_ops) print (set(fwd_ops + bwd_ops)) ''' #g = tf.get_default_graph() #print (g.get_operations()) #print (tf.trainable_variables()) #print (tf.get_default_graph().get_operations()[-1:]) graph_location = tempfile.mkdtemp() #print('Saving graph to: %s' % graph_location) train_writer = tf.summary.FileWriter(graph_location) train_writer.add_graph(tf.get_default_graph()) max_use = memory_stats_ops.MaxBytesInUse() config = tf.ConfigProto() #config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) for i in range(30): batch = mnist.train.next_batch(2000) if i % 10 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0 }) print('step %d, training accuracy %g' % (i, train_accuracy)) print(sess.run(max_use) / 1e6) train_step.run(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5 }) print('test accuracy %g' % accuracy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0 }))
}, options=run_options, run_metadata=run_metadata) if iteration % check_interval == 0: loss_val = loss.eval(feed_dict={ X: mnist.validation.images, y: mnist.validation.labels }) if loss_val < best_loss_val: best_loss_val = loss_val checks_since_last_progress = 0 best_model_params = get_model_params() else: checks_since_last_progress += 1 max_bytes_in_use = sess.run( memory_stats_ops.MaxBytesInUse()) / 1e6 print("Max Memory used: %.2f MB " % (max_bytes_in_use)) #mem_use = mem_util.peak_memory(run_metadata)['/gpu:0']/1e6 #print("Memory used: %.2f MB "%(mem_use)) acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch}) acc_val = accuracy.eval(feed_dict={ X: mnist.validation.images, y: mnist.validation.labels }) print( "Epoch {}, train accuracy: {:.4f}%, valid. accuracy: {:.4f}%, valid. best loss: {:.6f}" .format(epoch, acc_train * 100, acc_val * 100, best_loss_val)) if checks_since_last_progress > max_checks_without_progress: print("Early stopping!") break