def test_while_loop_with_ta_read_and_write(self): i = tf_placeholder(tf.int32, (), name="input_1") inputs = tf_placeholder(tf.float32, (10, ), name="input_2") inputs_2 = tf.identity(inputs) input_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True).unstack(inputs_2) output_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) c = lambda i, *_: tf.logical_and(tf.less(i, 10), i >= 0) def b(i, out_ta): new_i = tf.add(i, 1) x = input_ta.read(i) x = x + 3 out_ta_new = out_ta.write(i, x) return new_i, out_ta_new i_final, out_final = tf.while_loop(c, b, [i, output_ta]) _ = tf.identity(i_final, name="i") _ = tf.identity(out_final.stack(), name="output_ta") input_names_with_port = ["input_1:0", "input_2:0"] output_names_with_port = ["i:0", "output_ta:0"] self._run_test_case(input_names_with_port, output_names_with_port)
def test_map_fn(self): def fn0(elem): res = elem + elem * elem return res def fn1(elem): res = elem[0] * elem[1] + elem[0] return res x_val = 100 * np.random.random_sample([2, 10]).astype(np.float32) y_val = 100 * np.random.random_sample([2, 10]).astype(np.float32) # test fn0 x = tf_placeholder(tf.float32, shape=x_val.shape, name="input_0") x_ = tf.identity(x) res_ = tf.map_fn(fn0, x_, dtype=tf.float32) _ = tf.identity(res_, name="output_0") input_names_with_port = ["input_0:0"] output_names_with_port = ["output_0:0"] self._run_test_case(input_names_with_port, output_names_with_port) tf_reset_default_graph() # test fn1 x = tf_placeholder(tf.float32, shape=x_val.shape, name="input_0") y = tf_placeholder(tf.float32, shape=y_val.shape, name="input_1") x_ = tf.identity(x) y_ = tf.identity(y) res_ = tf.map_fn(fn1, (x_, y_), dtype=tf.float32) _ = tf.identity(res_, name="output_0") input_names_with_port = ["input_0:0", "input_1:0"] output_names_with_port = ["output_0:0"] self._run_test_case(input_names_with_port, output_names_with_port)
def test_while_loop_in_cond(self): x_val = np.array([1, 2, 3], dtype=np.float32) y_val = np.array([4, 5, 6], dtype=np.float32) x = tf_placeholder(tf.float32, x_val.shape, name="input_1") y = tf_placeholder(tf.float32, y_val.shape, name="input_2") def cond_graph(): b = tf.constant(np.array([0], dtype=np.int32), dtype=tf.int32) # while_loop c = lambda y: tf.reduce_any(tf.less(y, 10)) b = lambda i: tf.add(y, 1) return tf.while_loop(c, b, [y]) res = tf.cond(x[0] < y[0], lambda: x, cond_graph, name="test_cond") _ = tf.identity(res, name="output") input_names_with_port = ["input_1:0", "input_2:0"] output_names_with_port = ["output:0"] self._run_test_case(input_names_with_port, output_names_with_port)
def test_bidrectional_attention_wrapper_lstm_encoder(self): size = 30 time_step = 3 input_size = 4 attn_size = size batch_size = 9 # shape [batch size, time step, size] # attention_state: usually the output of an RNN encoder. # This tensor should be shaped `[batch_size, max_time, ...]` encoder_time_step = time_step encoder_x_val = np.random.randn(encoder_time_step, input_size).astype('f') encoder_x_val = np.stack([encoder_x_val] * batch_size) encoder_x = tf_placeholder(tf.float32, encoder_x_val.shape, name="input_1") encoder_cell = tf.nn.rnn_cell.LSTMCell(size) attention_states, _ = tf.nn.dynamic_rnn(encoder_cell, encoder_x, dtype=tf.float32) # [9, 3, 30], [9, 30] attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( attn_size, attention_states) match_input_fn = lambda curr_input, state: tf.concat( [curr_input, state], axis=-1) cell = tf.nn.rnn_cell.LSTMCell(size) match_cell_fw = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=attn_size, cell_input_fn=match_input_fn, output_attention=False) match_cell_bk = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=attn_size, cell_input_fn=match_input_fn, output_attention=False) decoder_time_step = 6 decoder_x_val = np.random.randn(decoder_time_step, batch_size, input_size).astype('f') decoder_x = tf_placeholder(tf.float32, decoder_x_val.shape, name="input_2") seq_length = tf_placeholder(tf.int32, (batch_size), name="input_3") (match_output_fw, match_output_bk), (match_state_fw, match_state_bk) = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=match_cell_fw, cell_bw=match_cell_bk, inputs=decoder_x, sequence_length=tf.identity(seq_length), dtype=tf.float32, time_major=True) matched_output = tf.concat([match_output_fw, match_output_bk], axis=-1) matched_state = tf.concat( [match_state_fw.cell_state, match_state_bk.cell_state], -1) _ = tf.identity(matched_output, name="output_0") _ = tf.identity(matched_state, name="final_state") input_names_with_port = ["input_1:0", "input_2:0", "input_3:0"] output_names_with_port = ["output_0:0", "final_state:0"] self._run_test_case(input_names_with_port, output_names_with_port)
def compute_const_folding_using_tf(g, const_node_values, graph_outputs): """Find nodes with constant inputs and compute their values using TF""" if const_node_values is None: const_node_values = {} graph_outputs = set(graph_outputs) from tf2onnxnightly.tf_loader import tf_session, tf_placeholder # pylint: disable=import-outside-toplevel ops = g.get_operations() outputs_to_values = {} outputs_to_dtypes = {} outputs_to_shapes = {} shape_node_outputs = {} def is_small_shape(x): return np.product(x) <= 1000 def is_huge_shape(x): return np.product(x) >= 1000000 for node in ops: # Load values of constants. Use const_node_values if possible if node.type in ["Const", "ConstV2"]: tensor = node.node_def.attr["value"].tensor if node.name in const_node_values: tensor.tensor_content = const_node_values[node.name] outputs_to_values[node.outputs[0].name] = get_tf_tensor_data( tensor) outputs_to_dtypes[node.outputs[0].name] = node.outputs[0].dtype for out in node.outputs: outputs_to_shapes[out.name] = get_tf_tensor_shape(out) for node in ops: if node.type == "Shape": shape = outputs_to_shapes.get(node.inputs[0].name) if shape is not None: shape_node_outputs[node.outputs[0].name] = shape unneeded_outputs = set() progress = True while progress: progress = False for node in ops: # Find ops with constant inputs and compute their values input_names = [i.name for i in node.inputs] output_names = [i.name for i in node.outputs] if node.type == 'StridedSlice' and input_names[0] in shape_node_outputs \ and output_names[0] not in outputs_to_values: shape = shape_node_outputs[input_names[0]] i = get_index_from_strided_slice_of_shape( node, outputs_to_values) if i is not None and 0 <= i < len( shape) and shape[i] is not None: np_dtype = map_onnx_to_numpy_type( map_tf_dtype(node.outputs[0].dtype)) outputs_to_values[output_names[0]] = np.array( shape[i], dtype=np_dtype) outputs_to_dtypes[ node.outputs[0].name] = node.outputs[0].dtype progress = True can_fold = node.type not in [ 'Enter', 'Placeholder', 'PlaceholderWithDefault' ] can_fold = can_fold and not node.type.startswith('Random') can_fold = can_fold and len(input_names) > 0 and all( inp in outputs_to_values for inp in input_names) # We can only fold nodes with a single output can_fold = can_fold and len( output_names) == 1 and output_names[0] not in outputs_to_values # Skip if value already computed, used, and discarded can_fold = can_fold and output_names[ 0] not in unneeded_outputs and output_names[ 0] not in graph_outputs if can_fold: # Make a mini graph containing just the node to fold g2 = tf.Graph() with g2.as_default(): for inp in input_names: tf_placeholder(outputs_to_dtypes[inp], name=inp.split(':')[0]) mini_graph_def = g2.as_graph_def() mini_graph_def.node.append(node.node_def) g3 = tf.Graph() with g3.as_default(): feed_dict = {} inp_shapes = [] for inp in input_names: inp_np = outputs_to_values[inp] feed_dict[inp] = inp_np inp_shapes.append(inp_np.shape) try: with tf_session() as sess: tf.import_graph_def(mini_graph_def, name='') results = sess.run(output_names, feed_dict=feed_dict) if is_huge_shape(results[0].shape) and all( is_small_shape(inp) for inp in inp_shapes): logger.debug( "Skipping folding of node %s since result shape %s is much larger " "than input shapes %s", node.name, results[0].shape, inp_shapes) else: outputs_to_values[output_names[0]] = results[0] outputs_to_dtypes[ output_names[0]] = node.outputs[0].dtype progress = True except Exception: # pylint: disable=broad-except logger.debug("Could not fold node %s", node.name) unneeded_outputs.update(outputs_to_values.keys()) for node in ops: # Mark values we need to keep input_names = [i.name for i in node.inputs] output_names = [i.name for i in node.outputs] if len(output_names) == 1 and output_names[0] in outputs_to_values: continue for i in input_names: if i in unneeded_outputs: unneeded_outputs.remove(i) for node in unneeded_outputs: # Remove unneeded values to prevent memory usage explosion if node in outputs_to_values: del outputs_to_values[node] del outputs_to_dtypes[node] for node in ops: # We don't need the constants any more if node.type in ["Const", "ConstV2" ] and node.outputs[0].name in outputs_to_values: del outputs_to_values[node.outputs[0].name] del outputs_to_dtypes[node.outputs[0].name] logger.info("Computed %d values for constant folding", len(outputs_to_values)) return outputs_to_values, outputs_to_dtypes
def freeze_and_run_tf(self, func, feed_dict, outputs, as_session, premade_placeholders, large_model, constant_fold): np.random.seed(1) # Make it reproducible. clean_feed_dict = {utils.node_name(k): v for k, v in feed_dict.items()} if is_tf2() and not as_session: # # use eager to execute the tensorflow func # # numpy doesn't work for all ops, make it tf.Tensor() input_tensors = [ tf.TensorSpec(shape=v.shape, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] input_list = [ tf.convert_to_tensor(v, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] tf.random.set_seed(1) result = func(*input_list) if isinstance(result, (list, tuple)): # list or tuple result = [x.numpy() for x in result] else: # single result result = [result.numpy()] # now make the eager functions a graph concrete_func = tf.function(func, input_signature=tuple(input_tensors)) concrete_func = concrete_func.get_concrete_function() graph_def = from_function(concrete_func, input_names=list(feed_dict.keys()), output_names=outputs, large_model=large_model) initialized_tables = None else: # # use graph to execute the tensorflow func # with tf_session() as sess: tf_set_random_seed(1) input_list = [] if not premade_placeholders: for k, v in clean_feed_dict.items(): input_list.append( tf_placeholder(name=k, shape=v.shape, dtype=tf.as_dtype(v.dtype))) func(*input_list) variables_lib.global_variables_initializer().run() tf_tables_initializer().run() output_dict = [] for out_name in outputs: output_dict.append(sess.graph.get_tensor_by_name(out_name)) result = sess.run(output_dict, feed_dict=feed_dict) graph_def = freeze_session(sess, input_names=list(feed_dict.keys()), output_names=outputs) table_names, key_dtypes, value_dtypes = get_hash_table_info( graph_def) initialized_tables = {} for n, k_dtype, val_dtype in zip(table_names, key_dtypes, value_dtypes): h = lookup_ops.hash_table_v2(k_dtype, val_dtype, shared_name=n) k, v = lookup_ops.lookup_table_export_v2( h, k_dtype, val_dtype) initialized_tables[n] = (sess.run(k), sess.run(v)) tf_reset_default_graph() with tf_session() as sess: tf.import_graph_def(graph_def, name='') graph_def = tf_optimize(list(feed_dict.keys()), outputs, graph_def, fold_constant=constant_fold) model_path = os.path.join( self.test_data_directory, self._testMethodName + "_after_tf_optimize.pb") utils.save_protobuf(model_path, graph_def) self.logger.debug("created file %s", model_path) return result, graph_def, initialized_tables