def test_forward_distribute_keys_v4(embedding_type): with tf.GradientTape() as tape: with tf.device("/gpu:0"): vocabulary_size = 8 slot_num = 3 embedding_vec_size = 4 init_value = np.float32([ i for i in range(1, vocabulary_size * embedding_vec_size + 1) ]).reshape(vocabulary_size, embedding_vec_size) # init_value = False # print(init_value) hugectr.init(visiable_gpus=[0, 1, 3, 4], seed=123, key_type='int64', value_type='float', batch_size=4, batch_size_eval=4) embedding_name = hugectr.create_embedding( init_value=init_value, opt_hparams=[1.0, 0.9, 0.99, 1e-3], name_='test_embedding', max_vocabulary_size_per_gpu=1737710, slot_num=slot_num, embedding_vec_size=embedding_vec_size, max_feature_num=4, embedding_type=embedding_type, max_nnz=2) keys = np.array( [[[0, -1], [1, -1], [2, 6]], [[0, -1], [1, -1], [-1, -1]], [[0, -1], [1, -1], [6, -1]], [[0, -1], [1, -1], [2, -1]]], dtype=np.int64) sparse_indices = tf.where(keys != -1) #[N, ndims] values = tf.gather_nd(keys, sparse_indices) # [N] row_offsets, value_tensors, nnz_array = hugectr.distribute_keys_v4( all_keys=keys, gpu_count=4, embedding_type=embedding_type, max_nnz=2, batch_size=4, slot_num=3) print("row_offsets = ", row_offsets, "\n") print("value_tensors = ", value_tensors, "\n") print("nnz_array = ", nnz_array, "\n") bp_trigger = tf.Variable( initial_value=[1.0, 2.0], trainable=True, dtype=tf.float32, name='embedding_plugin_bprop_trigger') # must be trainable forward_result = hugectr.fprop_v2( embedding_name=embedding_name, row_offsets=row_offsets, nnz_array=nnz_array, value_tensors=value_tensors, is_training=True, bp_trigger=bp_trigger, output_shape=[4, slot_num, embedding_vec_size]) print("first step: \n", forward_result) grads = tape.gradient(forward_result, bp_trigger) forward_result = hugectr.fprop_v2( embedding_name=embedding_name, row_offsets=row_offsets, nnz_array=nnz_array, value_tensors=value_tensors, is_training=False, bp_trigger=bp_trigger, output_shape=[4, slot_num, embedding_vec_size]) print("second step: \n", forward_result)
def tf_distribute_keys_fprop_v3(embedding_type): with tf.GradientTape() as tape: with tf.device("/gpu:0"): vocabulary_size = 8 slot_num = 3 embedding_vec_size = 4 init_value = np.float32([ i for i in range(1, vocabulary_size * embedding_vec_size + 1) ]).reshape(vocabulary_size, embedding_vec_size) # init_value = False # print(init_value) hugectr.init(visiable_gpus=[0, 1, 3, 4], seed=123, key_type='int64', value_type='float', batch_size=4, batch_size_eval=4) embedding_name = hugectr.create_embedding( init_value=init_value, opt_hparams=[1.0, 0.9, 0.99, 1e-3], name_='test_embedding', max_vocabulary_size_per_gpu=1737710, slot_num=slot_num, embedding_vec_size=embedding_vec_size, max_feature_num=4, embedding_type=embedding_type, max_nnz=2) keys = np.array( [[[0, -1], [1, -1], [2, 6]], [[0, -1], [1, -1], [-1, -1]], [[0, -1], [1, -1], [6, -1]], [[0, -1], [1, -1], [2, -1]]], dtype=np.int64) row_offsets, value_tensors, nnz_array = _distribute_kyes( tf.convert_to_tensor(keys), gpu_count=4, embedding_type=embedding_type) print("row_ptrs", row_offsets) print("\nvalues", value_tensors) print("\n", nnz_array) row_offsets, value_tensors, nnz_array = _distribute_kyes( tf.convert_to_tensor(keys), gpu_count=4, embedding_type=embedding_type) print("\nrow_ptrs", row_offsets) print("\nvalues", value_tensors) print("\n", nnz_array) # print("\n", _distribute_kyes.pretty_printed_concrete_signatures(), "\n") bp_trigger = tf.Variable( initial_value=[1.0, 2.0], trainable=True, dtype=tf.float32, name='embedding_plugin_bprop_trigger') # must be trainable forward_result = hugectr.fprop_v3( embedding_name=embedding_name, row_offsets=row_offsets, nnz_array=nnz_array, value_tensors=value_tensors, is_training=True, bp_trigger=bp_trigger, output_shape=[4, slot_num, embedding_vec_size]) print("first step: \n", forward_result) grads = tape.gradient(forward_result, bp_trigger) forward_result = hugectr.fprop_v3( embedding_name=embedding_name, row_offsets=row_offsets, nnz_array=nnz_array, value_tensors=value_tensors, is_training=False, bp_trigger=bp_trigger, output_shape=[4, slot_num, embedding_vec_size]) print("second step: \n", forward_result)
def test(): with tf.GradientTape() as tape: with tf.device("/gpu:0"): vocabulary_size = 8 slot_num = 3 embedding_vec_size = 4 init_value = np.float32([ i for i in range(1, vocabulary_size * embedding_vec_size + 1) ]).reshape(vocabulary_size, embedding_vec_size) # init_value = False # print(init_value) hugectr.init(visiable_gpus=[0, 1, 3, 4], seed=123, key_type='uint32', value_type='float', batch_size=4, batch_size_eval=4) embedding_name = hugectr.create_embedding( init_value=init_value, opt_hparams=[0.1, 0.9, 0.99, 1e-3], name_='test_embedding', max_vocabulary_size_per_gpu=5, slot_num=slot_num, embedding_vec_size=embedding_vec_size, max_feature_num=4, embedding_type='localized', max_nnz=2) # print(embedding_name) # embedding_name = hugectr.create_embedding(init_value=init_value, opt_hparams=[0.001, 0.9, 0.99, 1e-3], name_='test_embedding', # max_vocabulary_size_per_gpu=5, slot_num=slot_num, embedding_vec_size=embedding_vec_size, # max_feature_num=4) # print(embedding_name) # embedding_name = hugectr.create_embedding(init_value=init_value, opt_hparams=[0.001, 0.9, 0.99, 1e-3], name_='test_embedding', # max_vocabulary_size_per_gpu=5, slot_num=slot_num, embedding_vec_size=embedding_vec_size, # max_feature_num=4) # print(embedding_name) keys = np.array( [[[0, -1, -1, -1], [1, -1, -1, -1], [2, 6, -1, -1]], [[0, -1, -1, -1], [1, -1, -1, -1], [-1, -1, -1, -1]], [[0, -1, -1, -1], [1, -1, -1, -1], [6, -1, -1, -1]], [[0, -1, -1, -1], [1, -1, -1, -1], [2, -1, -1, -1]]], dtype=np.int64) sparse_indices = tf.where(keys != -1) #[N, ndims] values = tf.gather_nd(keys, sparse_indices) # [N] # print("sparse_indices = ", sparse_indices) # print("values = ", values) bp_trigger = tf.Variable( initial_value=[1.0, 2.0], trainable=True, dtype=tf.float32, name='embedding_plugin_bprop_trigger') # must be trainable forward_result = hugectr.fprop(embedding_name=embedding_name, sparse_indices=sparse_indices, values=values, dense_shape=keys.shape, output_type=tf.float32, is_training=True, bp_trigger=bp_trigger) print("first step: \n", forward_result) grads = tape.gradient(forward_result, bp_trigger) forward_result = hugectr.fprop(embedding_name=embedding_name, sparse_indices=sparse_indices, values=values, dense_shape=keys.shape, output_type=tf.float32, is_training=False, bp_trigger=bp_trigger) print("second step: \n", forward_result)