def test_read_data(embedding_type, batch_size, display_steps, distribute_keys): cols = [ utils.idx2key(idx, False) for idx in range(0, utils.NUM_TOTAL_COLUMNS) ] feature_desc = dict() for col in cols: if col == 'label' or col.startswith("I"): feature_desc[col] = tf.io.FixedLenFeature([], tf.int64) # scaler else: feature_desc[col] = tf.io.FixedLenFeature( [1], tf.int64) # [slot_num, nnz] dataset_names = ["train.tfrecord"] dataset = create_dataset(dataset_names=dataset_names, feature_desc=feature_desc, batch_size=batch_size, n_epochs=1, distribute_keys=tf.constant(distribute_keys, dtype=tf.bool), gpu_count=tf.constant(4, dtype=tf.int32), embedding_type=tf.constant(embedding_type, dtype=tf.string)) total_steps = 0 total_begin_time = time.time() begin_time = total_begin_time for step, datas in enumerate(dataset): total_steps += 1 a = datas if step % display_steps == 0 and step != 0: end_time = time.time() tf.print("Elapsed time: %.5f for %d steps." % (end_time - begin_time, display_steps)) begin_time = time.time() total_end_time = time.time() total_elapsed_time = total_end_time - total_begin_time print( "Total elapsed time: %.5f seconds for %d steps. Average elapsed time: %.5f / step." % (total_elapsed_time, total_steps, (total_elapsed_time / total_steps)))
def main(args): #---------------- feature description for criteo dataset in tfrecord. ---------- # cols = [ utils.idx2key(idx, False) for idx in range(0, utils.NUM_TOTAL_COLUMNS) ] feature_desc = dict() for col in cols: if col == 'label' or col.startswith("I"): feature_desc[col] = tf.io.FixedLenFeature([], tf.int64) # scaler else: feature_desc[col] = tf.io.FixedLenFeature( [1], tf.int64) # [slot_num, nnz] # -------------- create dataset pipeline --------------------------------------- # dataset_names = [args.data_path + "/train_0.tfrecord"] dataset = create_dataset(dataset_names=dataset_names, feature_desc=feature_desc, batch_size=args.batch_size, n_epochs=args.n_epochs, distribute_keys=tf.constant(True, dtype=tf.bool), gpu_count=len(args.gpus), embedding_type=tf.constant('distributed', dtype=tf.string)) # ----------- build model and optimizers ---------------------------------------- # optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False) model = DeepFM_PluginEmbedding(vocabulary_size=args.vocabulary_size, embedding_vec_size=args.embedding_vec_size, which_embedding="Plugin", embedding_type="distributed", dropout_rate=[0.5] * 10, deep_layers=[1024] * 10, initializer='uniform', gpus=args.gpus, batch_size=args.batch_size, batch_size_eval=args.batch_size_eval, slot_num=args.slot_num) # ----------- define train step ------------------------------------------------- # @tf.function def _train_step(dense_batch, sparse_batch, label_batch, model, loss_fn, optimizer): with tf.GradientTape() as tape: label_batch = tf.cast(label_batch, dtype=tf.float32) logits = model(dense_batch, sparse_batch, training=True) loss = loss_fn(label_batch, logits) loss /= dense_batch.shape[0] grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) return loss # ------------------------ training loop ---------------------------------------- # logging.info("Begin to train..") begin_time = time.time() display_begin = begin_time for step, datas in enumerate(dataset): label, dense, sparse = datas[0], datas[1], datas[2:-1] train_loss = _train_step(dense, sparse, label, model, loss_fn, optimizer) loss_v = train_loss.numpy() if (step % args.display == 0 and step != 0): display_end = time.time() logging.info("step: %d, loss: %.7f, elapsed time: %.5f seconds." % (step, loss_v, (display_end - display_begin))) display_begin = display_end end_time = time.time() logging.info("Train end. Elapsed time: %.3f seconds." % (end_time - begin_time))
import txt2tfrecord as utils import tensorflow as tf import hugectr_tf_ops from model import OriginalEmbedding from read_data import create_dataset, CreateDataset import argparse import logging import time import numpy as np tf.debugging.set_log_device_placement(False) devices = tf.config.list_physical_devices("GPU") for dev in devices: tf.config.experimental.set_memory_growth(dev, True) cols = [utils.idx2key(idx, False) for idx in range(0, utils.NUM_TOTAL_COLUMNS)] feature_desc = dict() for col in cols: if col == 'label' or col.startswith("I"): feature_desc[col] = tf.io.FixedLenFeature([], tf.int64) # scaler else: feature_desc[col] = tf.io.FixedLenFeature([1], tf.int64) # [slot_num, nnz] def Convert_to_csr_test(batch_size, gpu_count, embedding_type, iterations=10): def _plugin_CPU_op_VS_tf_ops(): """ Compare the result of converting to CSR between plugin CPU ops and tf ops. """ dataset_names = ['./performance_profile/train.tfrecord']
def save_dataset_to_python_obj(batch_size, num_batch, save_name, gpu_count, convert_to_csr=True, embedding_type='distributed', get_row_indices=False): """ this function will save num_batch * batch_size samples to python obj. so that it can be load into CPU memory rather than read from tfrecord. """ import txt2tfrecord as utils from read_data import CreateDataset cols = [utils.idx2key(idx, False) for idx in range(0, utils.NUM_TOTAL_COLUMNS)] feature_desc = dict() for col in cols: if col == 'label' or col.startswith("I"): feature_desc[col] = tf.io.FixedLenFeature([], tf.int64) # scaler else: feature_desc[col] = tf.io.FixedLenFeature([1], tf.int64) # [slot_num, nnz] dataset_names = ["train.tfrecord"] dataset = CreateDataset(dataset_names=dataset_names, feature_desc=feature_desc, batch_size=batch_size, n_epochs=1, slot_num=26, max_nnz=1, convert_to_csr=tf.constant(convert_to_csr, dtype=tf.bool), gpu_count=gpu_count, embedding_type=embedding_type, get_row_indices=get_row_indices)() # read datas into python dict save_dict = dict() for step, datas in enumerate(dataset): if (step >= num_batch): break py_batch_datas = dict() label, dense, others = datas[0], datas[1], datas[2:] py_batch_datas["label"] = label.numpy() py_batch_datas["dense"] = dense.numpy() if (convert_to_csr): sparse = others[0:3] py_batch_datas["row_offsets"] = sparse[0].numpy() py_batch_datas["values"] = sparse[1].numpy() py_batch_datas["nnz_array"] = sparse[2].numpy() else: if get_row_indices: sparse = others[0:2] py_batch_datas['row_indices'] = sparse[0].numpy() py_batch_datas['values'] = sparse[1].numpy() else: sparse = others[-1] py_batch_datas["indices"] = sparse.indices.numpy() py_batch_datas["values"] = sparse.values.numpy() py_batch_datas["dense_shape"] = sparse.dense_shape.numpy() save_dict["step_" + str(step)] = py_batch_datas if (convert_to_csr or get_row_indices): file_name = save_name + "_" + embedding_type + "_" + str(gpu_count) else: file_name = save_name + "_" + str(gpu_count) # save dict into file with open(file_name, 'wb') as file: pickle.dump(save_dict, file) print("Save done %s." %file_name)
def main(args): cols = [utils.idx2key(idx, False) for idx in range(0, utils.NUM_TOTAL_COLUMNS)] feature_desc = dict() for col in cols: if col == 'label' or col.startswith("I"): feature_desc[col] = tf.io.FixedLenFeature([], tf.int64) # scaler else: feature_desc[col] = tf.io.FixedLenFeature([1], tf.int64) # [slot_num, nnz] # dataset_names = ["train_" + str(i) + ".tfrecord" for i in range(10)] dataset_names = ["train.tfrecord"] dataset = create_dataset(dataset_names=dataset_names, feature_desc=feature_desc, batch_size=args.batch_size, n_epochs=args.n_epochs, distribute_keys=tf.constant(args.distribute_keys != 0, dtype=tf.bool), gpu_count=len(args.gpus), embedding_type=tf.constant(args.embedding_type, dtype=tf.string)) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False) if args.which_embedding == "OriginalEmbedding": model = DeepFM_OriginalEmbedding(vocabulary_size=args.vocabulary_size, embedding_vec_size=args.embedding_vec_size, which_embedding=args.which_embedding, embedding_type=args.embedding_type, dropout_rate=[0.5] * 10, deep_layers=[1024] * 10, initializer='uniform', gpus=args.gpus, batch_size=args.batch_size, batch_size_eval=args.batch_size_eval, slot_num=args.slot_num) elif args.which_embedding == "PluginEmbedding": model = DeepFM_PluginEmbedding(vocabulary_size=args.vocabulary_size, embedding_vec_size=args.embedding_vec_size, which_embedding=args.which_embedding, embedding_type=args.embedding_type, dropout_rate=[0.5] * 10, deep_layers=[1024] * 10, initializer='uniform', gpus=args.gpus, batch_size=args.batch_size, batch_size_eval=args.batch_size_eval, slot_num=args.slot_num) @tf.function def _train_step(dense_batch, sparse_batch, y_batch, model, loss_fn, optimizer): with tf.GradientTape(persistent=False) as tape: y_batch = tf.cast(y_batch, dtype=tf.float32) logits = model(dense_batch, sparse_batch, training=True) loss = loss_fn(y_batch, logits) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) return loss logging.info("begin to train.") begin_time = time.time() train_loss_list = [] display_begin = begin_time # with tf.profiler.experimental.Profile("./origin_1030"): for step, items in enumerate(dataset): label, dense, others = items[0], items[1], items[2:] if (tf.convert_to_tensor(args.distribute_keys != 0, dtype=tf.bool)): sparse = others[0:3] else: sparse = others[-1] train_loss = _train_step(dense, sparse, label, model, loss_fn, optimizer) loss_value = train_loss.numpy() train_loss_list.append(loss_value) if (step % args.display == 0 and step != 0): display_end = time.time() logging.info("step: %d, loss: %.5f, elapsed time: %.5f seconds." %(step, loss_value, (display_end - display_begin))) display_begin = display_end if step >= 50: break end_time = time.time() logging.info("Train End. Elapsed Time: %.3f seconds." %(end_time - begin_time))
def save_tfrecord_to_python_file(embedding_type, gpu_count, num_batch=50, fprop_version='v1'): cols = [utils.idx2key(idx, False) for idx in range(0, utils.NUM_TOTAL_COLUMNS)] feature_desc = dict() for col in cols: if col == 'label' or col.startswith("I"): feature_desc[col] = tf.io.FixedLenFeature([], tf.int64) # scaler else: feature_desc[col] = tf.io.FixedLenFeature([1], tf.int64) # [slot_num, nnz] if fprop_version == "v1": dataset = CreateDataset(dataset_names=["./train.tfrecord"], feature_desc=feature_desc, batch_size=65536, n_epochs=1, slot_num=26, max_nnz=1, convert_to_csr=True, gpu_count=gpu_count, embedding_type=embedding_type, get_row_indices=False) save_dict = dict() for step, datas in enumerate(dataset()): if (step >= num_batch): break label, dense, others = datas[0], datas[1], datas[2:] py_batch_datas = dict() py_batch_datas["label"] = label.numpy() py_batch_datas['dense'] = dense.numpy() sparse = others[0:3] py_batch_datas['row_offsets'] = sparse[0].numpy() py_batch_datas['value_tensors'] = sparse[1].numpy() py_batch_datas['nnz_array'] = sparse[2].numpy() save_dict["step_" + str(step)] = py_batch_datas save_name = "plugin_v2_" + embedding_type + "_" + str(gpu_count) + "_" + fprop_version with open(save_name, "wb") as file: pickle.dump(save_dict, file) elif fprop_version == "v2": dataset = CreateDataset(dataset_names=["./train.tfrecord"], feature_desc=feature_desc, batch_size=65536, n_epochs=1, slot_num=26, max_nnz=1, convert_to_csr=False, gpu_count=gpu_count, embedding_type=embedding_type, get_row_indices=True) save_dict = dict() for step, datas in enumerate(dataset()): if (step >= num_batch): break label, dense, others = datas[0], datas[1], datas[2:] py_batch_datas = dict() py_batch_datas["label"] = label.numpy() py_batch_datas['dense'] = dense.numpy() sparse = others[0:2] py_batch_datas['row_indices'] = sparse[0].numpy() py_batch_datas['values'] = sparse[1].numpy() save_dict["step_" + str(step)] = py_batch_datas save_name = "plugin_v2_" + embedding_type + "_" + str(gpu_count) + "_" + fprop_version with open(save_name, "wb") as file: pickle.dump(save_dict, file) else: raise ValueError("fprop_version can only be one of ['v1', 'v2'], but got %s." %fprop_version) print("[INFO]: Save %s done." %save_name)