def test_fusedbatchnorm_nchw(): #Test 1: tf_model TF-native with tf.compat.v1.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, in_0 = tf_model() feed_dict = {in_0: k_np} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.compat.v1.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, in_0 = ng_model() feed_dict = {in_0: k_np} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) # transpose TF output from NHWC to NCHW for comparison with ngraph output result1_bool = np.allclose( np.transpose(tf_outval[0], (0, 3, 1, 2)), ng_outval[0], rtol=0, atol=1e-02) # these TF outputs do not need to be transposed since they have only 1 dimension result2_bool = np.allclose(tf_outval[1], ng_outval[1], rtol=0, atol=1e-02) result3_bool = np.allclose(tf_outval[2], ng_outval[2], rtol=0, atol=1e-02) assert (result1_bool and result2_bool and result3_bool)
def with_ngraph(self, l, config=tf.ConfigProto()): if ngraph_bridge.is_grappler_enabled(): rewrite_options = rewriter_config_pb2.RewriterConfig( meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig. ONE, min_graph_nodes=-1, custom_optimizers=[ rewriter_config_pb2.RewriterConfig.CustomGraphOptimizer( name="ngraph-optimizer") ]) config = tf.ConfigProto(graph_options=tf.GraphOptions( rewrite_options=rewrite_options)) ngraph_tf_disable_deassign_clusters = os.environ.pop( 'NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ngraph_bridge.enable() with tf.Session(config=config) as sess: retval = l(sess) os.environ.pop('NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None) if ngraph_tf_disable_deassign_clusters is not None: os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = \ ngraph_tf_disable_deassign_clusters return retval
def test_ng_serialize_to_json(self): for f in glob.glob("tf_function_ngraph_cluster*.json"): os.remove(f) initial_contents = set(os.listdir()) xshape = (3, 4, 5) x = tf.compat.v1.placeholder(tf.float32, shape=xshape) out = tf.nn.l2_loss(tf.abs(x)) values = np.random.rand(*xshape) config = ngraph_bridge.update_config(tf.compat.v1.ConfigProto()) ngraph_enable_serialize = os.environ.pop('NGRAPH_ENABLE_SERIALIZE', None) os.environ['NGRAPH_ENABLE_SERIALIZE'] = '1' ngraph_bridge.enable() with tf.compat.v1.Session(config=config) as sess: out = sess.run((out), feed_dict={x: values}) os.environ.pop('NGRAPH_ENABLE_SERIALIZE', None) if ngraph_enable_serialize is not None: os.environ['NGRAPH_ENABLE_SERIALIZE'] = \ ngraph_enable_serialize final_contents = set(os.listdir()) assert (len(final_contents) - len(initial_contents) == 1) new_files = final_contents.difference(initial_contents) flname = new_files.pop() assert (flname.startswith('tf_function_') and flname.endswith('json')) os.remove(flname)
def test_set_backend(self): # store env variables # when testing on backends like GPU the tests are run with NGRPAH_TF_BACKEND # by storing and restoring the env_variables we run the tests independent of the backend set # currently we store and restore only the NGRPAH_TF_BACKEND env_var_map = self.store_env_variables() # test ngraph_bridge.enable() backend_cpu = 'CPU' backend_interpreter = 'INTERPRETER' found_cpu = False found_interpreter = False # These will only print when running pytest with flag "-s" print("Number of supported backends ", ngraph_bridge.backends_len()) supported_backends = ngraph_bridge.list_backends() print(" ****** Supported Backends ****** ") for backend_name in supported_backends: print(backend_name) if backend_name == backend_cpu: found_cpu = True if backend_name == backend_interpreter: found_interpreter = True print(" ******************************** ") assert (found_cpu and found_interpreter) == True # Create Graph val = tf.placeholder(tf.float32) out1 = tf.abs(val) out2 = tf.abs(out1) # set INTERPRETER backend assert ngraph_bridge.is_supported_backend(backend_interpreter) == True ngraph_bridge.set_backend(backend_interpreter) currently_set_backend = ngraph_bridge.get_currently_set_backend_name() assert currently_set_backend == backend_interpreter # create new session to execute graph # If you want to re-confirm which backend the graph was executed # currently the only way is to enable NGRAPH_TF_VLOG_LEVEL=5 with tf.Session() as sess: sess.run((out2, ), feed_dict={val: ((1.4, -0.5, -1))}) currently_set_backend = ngraph_bridge.get_currently_set_backend_name() assert currently_set_backend == backend_interpreter # set CPU backend assert ngraph_bridge.is_supported_backend(backend_cpu) == True ngraph_bridge.set_backend(backend_cpu) currently_set_backend = ngraph_bridge.get_currently_set_backend_name() assert currently_set_backend == backend_cpu # create new session to execute graph with tf.Session() as sess: sess.run((out2, ), feed_dict={val: ((1.4, -0.5, -1))}) currently_set_backend = ngraph_bridge.get_currently_set_backend_name() assert currently_set_backend == backend_cpu # restore env_variables self.restore_env_variables(env_var_map)
def on_ngraph_change(self, change): if change['type'] == 'change' and change['name'] == 'value': i = self.ngraph_backends.index(change['new']) if self.ngraph_backends[i] == 'DISABLED': self.use_ngraph = False ngraph_bridge.disable() else: self.use_ngraph = True ngraph_bridge.enable() ngraph_bridge.set_backend(self.ngraph_backends[i])
def set_os_env(select_device): if select_device == 'CPU': # run on TF only ngraph_bridge.disable() else: if not ngraph_bridge.is_enabled(): ngraph_bridge.enable() assert select_device[: 7] == "NGRAPH_", "Expecting device name to start with NGRAPH_" back_end = select_device.split("NGRAPH_") os.environ['NGRAPH_TF_BACKEND'] = back_end[1]
def test_set_backend(): ngraph_bridge.enable() backend_cpu = 'CPU' backend_interpreter = 'INTERPRETER' found_cpu = False found_interpreter = False # These will only print when running pytest with flag "-s" print("Number of supported backends ", ngraph_bridge.backends_len()) supported_backends = ngraph_bridge.list_backends() print(" ****** Supported Backends ****** ") for backend_name in supported_backends: print(backend_name) if backend_name == backend_cpu: found_cpu = True if backend_name == backend_interpreter: found_interpreter = True print(" ******************************** ") assert (found_cpu and found_interpreter) == True # Create Graph val = tf.placeholder(tf.float32) out1 = tf.abs(val) out2 = tf.abs(out1) # set INTERPRETER backend assert ngraph_bridge.is_supported_backend(backend_interpreter) == True ngraph_bridge.set_backend(backend_interpreter) currently_set_backend = ngraph_bridge.get_currently_set_backend_name() assert currently_set_backend == backend_interpreter # create new session to execute graph # If you want to re-confirm which backend the graph was executed # currently the only way is to enable NGRAPH_TF_VLOG_LEVEL=5 with tf.Session() as sess: sess.run((out2, ), feed_dict={val: ((1.4, -0.5, -1))}) currently_set_backend = ngraph_bridge.get_currently_set_backend_name() assert currently_set_backend == backend_interpreter # set CPU backend assert ngraph_bridge.is_supported_backend(backend_cpu) == True ngraph_bridge.set_backend(backend_cpu) currently_set_backend = ngraph_bridge.get_currently_set_backend_name() assert currently_set_backend == backend_cpu # create new session to execute graph with tf.Session() as sess: sess.run((out2, ), feed_dict={val: ((1.4, -0.5, -1))}) currently_set_backend = ngraph_bridge.get_currently_set_backend_name() assert currently_set_backend == backend_cpu
def with_ngraph(self, l, config=tf.ConfigProto()): ngraph_tf_disable_deassign_clusters = os.environ.pop( 'NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ngraph_bridge.enable() with tf.Session(config=config) as sess: retval = l(sess) os.environ.pop('NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None) if ngraph_tf_disable_deassign_clusters is not None: os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = \ ngraph_tf_disable_deassign_clusters return retval
def with_ngraph(self, l, config=tf.ConfigProto()): # TODO: Stop grappler on failure (Add fail_on_optimizer_errors=True) config = ngraph_bridge.update_config(config) ngraph_tf_disable_deassign_clusters = os.environ.pop( 'NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ngraph_bridge.enable() with tf.Session(config=config) as sess: retval = l(sess) os.environ.pop('NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None) if ngraph_tf_disable_deassign_clusters is not None: os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = \ ngraph_tf_disable_deassign_clusters return retval
def test_conv2dbackpropinput_nhwc(padding): np_filter = np.random.rand(*filter_size_hwio).astype('f') n_np_out = np.random.rand(*out_backprop_in_sizes[padding]).astype('f') with tf.compat.v1.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, filter_size, out_backprop = tf_model(padding) feed_dict = {filter_size: np_filter, out_backprop: n_np_out} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.compat.v1.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, filter_size, out_backprop = ng_model(padding) feed_dict = {filter_size: np_filter, out_backprop: n_np_out} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) assert np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02)
def test_maxpoolbackprop_nhwc(padding): g_np = grad_nhwc[padding] o_np = output_nhwc[padding] #Test 1: tf_model TF-native with tf.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, orig_in, orig_out, grad = tf_model(padding) feed_dict = {orig_in: i_np, orig_out: o_np, grad: g_np} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, orig_in, orig_out, grad = ng_model(padding) feed_dict = {orig_in: i_np, orig_out: o_np, grad: g_np} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) assert (np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02))
def test_conv2d(): #Test 1: tf_model TF-native with tf.compat.v1.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, input_data = tf_model() feed_dict = {input_data: t_np} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.compat.v1.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, input_data = ng_model() feed_dict = {input_data: n_np} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) assert np.allclose(np.transpose(tf_outval, (0, 3, 1, 2)), ng_outval, rtol=0, atol=1e-02)
def test_fusedbatchnorm_nhwc(): #Test 1: tf_model TF-native with tf.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, in_0 = tf_model() feed_dict = {in_0: k_np} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, in_0 = ng_model() feed_dict = {in_0: k_np} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) result1_bool = np.allclose(tf_outval[0], ng_outval[0], rtol=0, atol=1e-02) result2_bool = np.allclose(tf_outval[1], ng_outval[1], rtol=0, atol=1e-02) result3_bool = np.allclose(tf_outval[2], ng_outval[2], rtol=0, atol=1e-02) assert (result1_bool and result2_bool and result3_bool)
def setup_ngraph_bridge(self, backend): # Enviornment variables os.environ['PLAIDML_USE_STRIPE'] = '1' if self.workers < 1: os.environ['OMP_NUM_THREADS'] = 1 else: # Use default if os.getenv('OMP_NUM_THREADS') is not None: del os.environ['OMP_NUM_THREADS'] import ngraph_bridge if backend == 'DISABLED' or backend == 'TF': ngraph_bridge.disable() elif backend == 'CPU': ngraph_bridge.set_backend('CPU') ngraph_bridge.enable() elif backend == 'PLAIDML': ngraph_bridge.set_backend('PLAIDML') ngraph_bridge.enable() else: print("ERROR: Unsupported backend " + backend + " selected.")
def with_ngraph(self, l, config=None): # Passing config as None and then initializing it inside # because mutable objects should not be used as defaults in python if config is None: config = tf.compat.v1.ConfigProto() # TODO: Stop grappler on failure (Add fail_on_optimizer_errors=True) config = ngraph_bridge.update_config(config) ngraph_tf_disable_deassign_clusters = os.environ.pop( 'NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ngraph_bridge.enable() with tf.compat.v1.Session(config=config) as sess: retval = l(sess) os.environ.pop('NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None) if ngraph_tf_disable_deassign_clusters is not None: os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = \ ngraph_tf_disable_deassign_clusters return retval
def test_conv2dbackpropfilter_nchw(padding): n_np_inp = np.random.rand(*input_sizes_nchw).astype('f') n_np_out = np.random.rand(*out_backprop_in_sizes[padding]).astype('f') #Reshape to NHWC for TF t_np_inp = np.transpose(n_np_inp, (0, 2, 3, 1)) t_np_out = np.transpose(n_np_out, (0, 2, 3, 1)) with tf.compat.v1.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, input_data, out_backprop = tf_model(padding) feed_dict = {input_data: t_np_inp, out_backprop: t_np_out} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.compat.v1.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, input_data, out_backprop = ng_model(padding) feed_dict = {input_data: n_np_inp, out_backprop: n_np_out} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) assert np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02)
def test_enable(self): ngraph_bridge.enable() assert ngraph_bridge.is_enabled() == 1
def run(self): """run benchmark with optimized graph""" print("Run inference with dummy data") config = tf.compat.v1.ConfigProto() config.intra_op_parallelism_threads = self.args.num_intra_threads config.inter_op_parallelism_threads = self.args.num_inter_threads config.use_per_session_threads = True data_graph = tf.Graph() with data_graph.as_default(): input_shape = [ self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3 ] images = tf.random.uniform( input_shape, 0.0, 255.0, dtype=tf.float32, seed=42, name='synthetic_images') infer_graph = tf.Graph() with infer_graph.as_default(): graph_def = tf.compat.v1.GraphDef() with tf.io.gfile.GFile(self.args.input_graph, 'rb') as input_file: input_graph_content = input_file.read() graph_def.ParseFromString(input_graph_content) print( "Optimizing graph %s for inference..." % self.args.input_graph) output_graph = optimize_for_inference( graph_def, [INPUTS], [OUTPUTS], dtypes.float32.as_datatype_enum, False) tf.import_graph_def(output_graph, name='') input_tensor = infer_graph.get_tensor_by_name('input_tensor:0') output_tensor = infer_graph.get_tensor_by_name('softmax_tensor:0') # Run without nGraph first print("Run inference (without nGraph)") ngraph_bridge.disable() data_sess = tf.compat.v1.Session(graph=data_graph, config=config) infer_sess = tf.compat.v1.Session(graph=infer_graph, config=config) iteration = 0 num_processed_images = 0 num_remaining_images = self.args.num_images tf_time = 0.0 tf_labels = np.array([], dtype=np.int32) while num_remaining_images >= self.args.batch_size: np_images = data_sess.run(images) if iteration > self.args.warmup_iters: num_processed_images += self.args.batch_size num_remaining_images -= self.args.batch_size tf_start_time = time.time() predictions = infer_sess.run(output_tensor, {input_tensor: np_images}) tf_elapsed_time = time.time() - tf_start_time if iteration > self.args.warmup_iters: tf_time += tf_elapsed_time tf_labels = np.append(tf_labels, np.argmax( predictions, axis=-1)) iteration += 1 print("Total execution time (TF): ", tf_time) # Run with nGraph now print("Run inference (with nGraph)") ngraph_bridge.enable() data_sess = tf.compat.v1.Session(graph=data_graph, config=config) infer_sess = tf.compat.v1.Session(graph=infer_graph, config=config) iteration = 0 num_processed_images = 0 num_remaining_images = self.args.num_images ngtf_time = 0.0 ngtf_labels = np.array([], dtype=np.int32) while num_remaining_images >= self.args.batch_size: np_images = data_sess.run(images) if iteration > self.args.warmup_iters: num_processed_images += self.args.batch_size num_remaining_images -= self.args.batch_size ngtf_start_time = time.time() predictions = infer_sess.run(output_tensor, {input_tensor: np_images}) ngtf_elapsed_time = time.time() - ngtf_start_time if iteration > self.args.warmup_iters: ngtf_time += ngtf_elapsed_time ngtf_labels = np.append(ngtf_labels, np.argmax(predictions, axis=-1)) iteration += 1 print("Total execution time (NGTF): ", ngtf_time) print("Processed %d images. Batch size = %d" % (num_processed_images, self.args.batch_size)) print("Avg throughput (TF): %0.4f img/s" % (num_processed_images / tf_time)) print("Avg throughput (NGTF): %0.4f img/s" % (num_processed_images / ngtf_time)) assert ((tf_labels == ngtf_labels).all())
import warnings warnings.simplefilter('ignore') import ngraph_bridge ngraph_bridge.enable() import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mpimg import os import random import tensorflow as tf from tensorflow import keras from tensorflow.keras.applications import ResNet50 from tensorflow.keras.applications import MobileNetV2 from tensorflow.keras.models import Sequential from tensorflow.python.keras import optimizers from keras.applications.resnet50 import preprocess_input from keras.preprocessing.image import ImageDataGenerator import ipywidgets as widgets from IPython.display import clear_output class ProgressBar(tf.keras.callbacks.Callback): def __init__(self, demo): self.demo = demo self.epoch = 0