def session_impl_test(json_file): solver_config = solver_parser_helper(seed=0, batchsize=16384, batchsize_eval=16384, model_file="", embedding_files=[], vvgpu=[[0, 1], [2, 3], [4, 5], [6, 7]], use_mixed_precision=False, scaler=1.0, i64_input_key=False, use_algorithm_search=True, use_cuda_graph=True, repeat_dataset=True) sess = Session(solver_config, json_file) sess.start_data_reading() lr_sch = get_learning_rate_scheduler(json_file) for i in range(10000): lr = lr_sch.get_next() sess.set_learning_rate(lr) sess.train() if (i % 100 == 0): loss = sess.get_current_loss() if (rank == 0): print("[HUGECTR][INFO] iter: {}; loss: {}".format(i, loss)) if (i % 1000 == 0 and i != 0): metrics = sess.evaluation() print("[HUGECTR][INFO] rank: {}, iter: {}, {}".format( rank, i, metrics)) return
def session_impl_test(json_file): solver_config = solver_parser_helper(seed=0, batchsize=16384, batchsize_eval=16384, model_file="", embedding_files=[], vvgpu=[[0, 1, 2, 3, 4, 5, 6, 7]], use_mixed_precision=True, scaler=1024, i64_input_key=False, use_algorithm_search=True, use_cuda_graph=True, repeat_dataset=True) lr_sch = get_learning_rate_scheduler(json_file) sess = Session(solver_config, json_file) sess.start_data_reading() for i in range(10000): lr = lr_sch.get_next() sess.set_learning_rate(lr) sess.train() if (i % 100 == 0): loss = sess.get_current_loss() print("[HUGECTR][INFO] iter: {}; loss: {}".format(i, loss)) if (i % 1000 == 0 and i != 0): sess.check_overflow() sess.copy_weights_for_evaluation() data_reader_eval = sess.get_data_reader_eval() for _ in range(solver_config.max_eval_batches): sess.eval() metrics = sess.get_eval_metrics() print("[HUGECTR][INFO] iter: {}, {}".format(i, metrics)) return
def set_source_raw_test(json_file): train_data = "./train_data.bin" test_data = "./test_data.bin" solver_config = solver_parser_helper(seed=0, batchsize=16384, batchsize_eval=16384, max_eval_batches=5441, model_file="", embedding_files=[], vvgpu=[[0, 1, 2, 3, 4, 5, 6, 7]], use_mixed_precision=True, scaler=1024, i64_input_key=False, use_algorithm_search=True, use_cuda_graph=True, repeat_dataset=False) lr_sch = get_learning_rate_scheduler(json_file) sess = Session(solver_config, json_file) data_reader_train = sess.get_data_reader_train() data_reader_eval = sess.get_data_reader_eval() data_reader_eval.set_source(test_data) iteration = 1 for cnt in range(2): data_reader_train.set_source(train_data) print("[HUGECTR][INFO] round: {}".format(cnt), flush=True) while True: lr = lr_sch.get_next() sess.set_learning_rate(lr) good = sess.train() if good == False: break if iteration % 4000 == 0: sess.check_overflow() sess.copy_weights_for_evaluation() data_reader_eval = sess.get_data_reader_eval() good_eval = True j = 0 while good_eval: if j >= solver_config.max_eval_batches: break good_eval = sess.eval() j += 1 if good_eval == False: data_reader_eval.set_source() metrics = sess.get_eval_metrics() print("[HUGECTR][INFO] iter: {}, metrics: {}".format( iteration, metrics), flush=True) iteration += 1 print("[HUGECTR][INFO] trained with data in {}".format(train_data), flush=True)
def model_oversubscriber_test(json_file, temp_dir): dataset = [("file_list." + str(i) + ".txt", "file_list." + str(i) + ".keyset") for i in range(5)] solver_config = solver_parser_helper(seed=0, batchsize=16384, batchsize_eval=16384, model_file="", embedding_files=[], vvgpu=[[0]], use_mixed_precision=False, scaler=1.0, i64_input_key=False, use_algorithm_search=True, use_cuda_graph=True, repeat_dataset=False) lr_sch = get_learning_rate_scheduler(json_file) sess = Session(solver_config, json_file, True, temp_dir) data_reader_train = sess.get_data_reader_train() data_reader_eval = sess.get_data_reader_eval() data_reader_eval.set_source("file_list.5.txt") model_oversubscriber = sess.get_model_oversubscriber() iteration = 0 for file_list, keyset_file in dataset: data_reader_train.set_source(file_list) model_oversubscriber.update(keyset_file) while True: lr = lr_sch.get_next() sess.set_learning_rate(lr) good = sess.train() if good == False: break if iteration % 100 == 0: sess.check_overflow() sess.copy_weights_for_evaluation() data_reader_eval = sess.get_data_reader_eval() good_eval = True j = 0 while good_eval: if j >= solver_config.max_eval_batches: break good_eval = sess.eval() j += 1 if good_eval == False: data_reader_eval.set_source() metrics = sess.get_eval_metrics() print("[HUGECTR][INFO] iter: {}, metrics: {}".format( iteration, metrics)) iteration += 1 print("[HUGECTR][INFO] trained with data in {}".format(file_list)) sess.download_params_to_files("./", iteration)
import hugectr from mpi4py import MPI solver = hugectr.solver_parser_helper(num_epochs=0, max_iter=10000, max_eval_batches=100, batchsize_eval=2048, batchsize=2048, display=200, eval_interval=1000, i64_input_key=False, use_mixed_precision=False, repeat_dataset=True) optimizer = hugectr.optimizer.CreateOptimizer( optimizer_type=hugectr.Optimizer_t.Adam, use_mixed_precision=False) model = hugectr.Model(solver, optimizer) model.add( hugectr.Input(data_reader_type=hugectr.DataReaderType_t.Norm, source="./criteo_data/train/_file_list.txt", eval_source="./criteo_data/val/_file_list.txt", check_type=hugectr.Check_t.Non, label_dim=1, label_name="label", dense_dim=13, dense_name="dense", data_reader_sparse_param_array=[ hugectr.DataReaderSparseParam( hugectr.DataReaderSparse_t.Distributed, 30, 1, 26) ], sparse_names=["data1"])) model.add( hugectr.SparseEmbedding(
import hugectr solver = hugectr.solver_parser_helper(num_epochs=0, max_iter=10000, max_eval_batches=100, batchsize_eval=2048, batchsize=2048, eval_interval=1000, use_mixed_precision=False) optimizer = hugectr.optimizer.CreateOptimizer( optimizer_type=hugectr.Optimizer_t.Adam, use_mixed_precision=False) model = hugectr.Model(solver, optimizer) model.add( hugectr.Input(data_reader_type=hugectr.DataReaderType_t.Norm, source="./file_list.txt", eval_source="./file_list_test.txt", check_type=hugectr.Check_t.Sum, label_dim=1, label_name="label", dense_dim=13, dense_name="dense", data_reader_sparse_param_array=[ hugectr.DataReaderSparseParam( hugectr.DataReaderSparse_t.Distributed, 30, 1, 26) ], sparse_names=["data1"])) model.add( hugectr.SparseEmbedding( embedding_type=hugectr.Embedding_t.DistributedSlotSparseEmbeddingHash, max_vocabulary_size_per_gpu=1737709, embedding_vec_size=16, combiner=0,
def train(input_train, input_val, max_iter, batchsize, snapshot, num_gpus, eval_interval, dense_model_file, sparse_model_files): logging.info(f"GPU Devices: {num_gpus}") # Configure and define the HugeCTR model solver = hugectr.solver_parser_helper(num_epochs = 0, max_iter = max_iter, max_eval_batches = 100, batchsize_eval = batchsize, batchsize = batchsize, model_file = dense_model_file, embedding_files = sparse_model_files, display = 200, eval_interval = eval_interval, i64_input_key = True, use_mixed_precision = False, repeat_dataset = True, snapshot = snapshot, vvgpu = [num_gpus], use_cuda_graph = False ) optimizer = hugectr.optimizer.CreateOptimizer(optimizer_type = hugectr.Optimizer_t.Adam, use_mixed_precision = False) model = hugectr.Model(solver, optimizer) # The slot_size_array are the cardinalities of each categorical feature after NVTabular preprocessing model.add(hugectr.Input(data_reader_type = hugectr.DataReaderType_t.Parquet, source = input_train, eval_source = input_val, check_type = hugectr.Check_t.Non, label_dim = 1, label_name = "label", dense_dim = 13, dense_name = "dense", slot_size_array = [18576837, 29428, 15128, 7296, 19902, 4, 6466, 1311, 62, 11700067, 622921, 219557, 11, 2209, 9780, 71, 4, 964, 15, 22022124, 4384510, 15960286, 290588, 10830, 96, 35], data_reader_sparse_param_array = [hugectr.DataReaderSparseParam(hugectr.DataReaderSparse_t.Distributed, 30, 1, 26)], sparse_names = ["data1"])) # Sparse Embedding Layer model.add(hugectr.SparseEmbedding(embedding_type = hugectr.Embedding_t.DistributedSlotSparseEmbeddingHash, max_vocabulary_size_per_gpu = 88656602, embedding_vec_size = 16, combiner = 0, sparse_embedding_name = "sparse_embedding1", bottom_name = "data1")) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Reshape, bottom_names = ["sparse_embedding1"], top_names = ["reshape1"], leading_dim=416)) # Concatenate sparse embedding and dense input model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Concat, bottom_names = ["reshape1", "dense"], top_names = ["concat1"])) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Slice, bottom_names = ["concat1"], top_names = ["slice11", "slice12"], ranges=[(0,429),(0,429)])) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.MultiCross, bottom_names = ["slice11"], top_names = ["multicross1"], num_layers=6)) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.InnerProduct, bottom_names = ["slice12"], top_names = ["fc1"], num_output=1024)) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.ReLU, bottom_names = ["fc1"], top_names = ["relu1"])) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Dropout, bottom_names = ["relu1"], top_names = ["dropout1"], dropout_rate=0.5)) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.InnerProduct, bottom_names = ["dropout1"], top_names = ["fc2"], num_output=1024)) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.ReLU, bottom_names = ["fc2"], top_names = ["relu2"])) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Dropout, bottom_names = ["relu2"], top_names = ["dropout2"], dropout_rate=0.5)) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Concat, bottom_names = ["dropout2", "multicross1"], top_names = ["concat2"])) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.InnerProduct, bottom_names = ["concat2"], top_names = ["fc3"], num_output=1)) model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.BinaryCrossEntropyLoss, bottom_names = ["fc3", "label"], top_names = ["loss"])) model.compile() model.summary() model.fit()
def _run_model(slot_sizes, total_cardinality): solver = hugectr.solver_parser_helper( vvgpu=[[0]], max_iter=2000, batchsize=2048, display=100, eval_interval=200, batchsize_eval=2048, max_eval_batches=160, i64_input_key=True, use_mixed_precision=False, repeat_dataset=True, snapshot=1900, ) optimizer = hugectr.optimizer.CreateOptimizer( optimizer_type=hugectr.Optimizer_t.Adam, use_mixed_precision=False ) model = hugectr.Model(solver, optimizer) model.add( hugectr.Input( data_reader_type=hugectr.DataReaderType_t.Parquet, source=DATA_DIR + "train/_file_list.txt", eval_source=DATA_DIR + "valid/_file_list.txt", check_type=hugectr.Check_t.Non, label_dim=1, label_name="label", dense_dim=0, dense_name="dense", slot_size_array=slot_sizes, data_reader_sparse_param_array=[ hugectr.DataReaderSparseParam( hugectr.DataReaderSparse_t.Distributed, len(slot_sizes) + 1, 1, len(slot_sizes) ) ], sparse_names=["data1"], ) ) model.add( hugectr.SparseEmbedding( embedding_type=hugectr.Embedding_t.DistributedSlotSparseEmbeddingHash, max_vocabulary_size_per_gpu=total_cardinality, embedding_vec_size=16, combiner=0, sparse_embedding_name="sparse_embedding1", bottom_name="data1", ) ) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.Reshape, bottom_names=["sparse_embedding1"], top_names=["reshape1"], leading_dim=48, ) ) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["reshape1"], top_names=["fc1"], num_output=128, ) ) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.ReLU, bottom_names=["fc1"], top_names=["relu1"], ) ) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["relu1"], top_names=["fc2"], num_output=128, ) ) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.ReLU, bottom_names=["fc2"], top_names=["relu2"], ) ) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["relu2"], top_names=["fc3"], num_output=1, ) ) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.BinaryCrossEntropyLoss, bottom_names=["fc3", "label"], top_names=["loss"], ) ) model.compile() model.summary() model.fit()
def train_hugectr(workflow, devices, out_path): # Gets embeddings and devices embeddings = list(get_embedding_sizes(workflow).values()) embeddings = [emb[0] for emb in embeddings] devices = [[int(d)] for d in list(devices)[0::2]] # Set solver and model solver = hugectr.solver_parser_helper( vvgpu=[[0]], max_iter=10000, max_eval_batches=100, batchsize_eval=2720, batchsize=2720, display=1000, eval_interval=3200, snapshot=3200, i64_input_key=True, use_mixed_precision=False, repeat_dataset=True, ) optimizer = hugectr.optimizer.CreateOptimizer( optimizer_type=hugectr.Optimizer_t.SGD, use_mixed_precision=False) model = hugectr.Model(solver, optimizer) model.add( hugectr.Input( data_reader_type=hugectr.DataReaderType_t.Parquet, source=out_path + "/output/train/_file_list.txt", eval_source=out_path + "/output/valid/_file_list.txt", check_type=hugectr.Check_t.Non, label_dim=1, label_name="label", dense_dim=13, dense_name="dense", slot_size_array=embeddings, data_reader_sparse_param_array=[ hugectr.DataReaderSparseParam( hugectr.DataReaderSparse_t.Localized, 26, 1, 26) ], sparse_names=["data1"], )) model.add( hugectr.SparseEmbedding( embedding_type=hugectr.Embedding_t. LocalizedSlotSparseEmbeddingHash, max_vocabulary_size_per_gpu=15500000, embedding_vec_size=128, combiner=0, sparse_embedding_name="sparse_embedding1", bottom_name="data1", )) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["dense"], top_names=["fc1"], num_output=512, )) model.add( hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=["fc1"], top_names=["relu1"])) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["relu1"], top_names=["fc2"], num_output=256, )) model.add( hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=["fc2"], top_names=["relu2"])) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["relu2"], top_names=["fc3"], num_output=128, )) model.add( hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=["fc3"], top_names=["relu3"])) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.Interaction, bottom_names=["relu3", "sparse_embedding1"], top_names=["interaction1"], )) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["interaction1"], top_names=["fc4"], num_output=1024, )) model.add( hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=["fc4"], top_names=["relu4"])) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["relu4"], top_names=["fc5"], num_output=1024, )) model.add( hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=["fc5"], top_names=["relu5"])) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["relu5"], top_names=["fc6"], num_output=512, )) model.add( hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=["fc6"], top_names=["relu6"])) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["relu6"], top_names=["fc7"], num_output=256, )) model.add( hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=["fc7"], top_names=["relu7"])) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.InnerProduct, bottom_names=["relu7"], top_names=["fc8"], num_output=1, )) model.add( hugectr.DenseLayer( layer_type=hugectr.Layer_t.BinaryCrossEntropyLoss, bottom_names=["fc8", "label"], top_names=["loss"], )) # Run training model.compile() model.summary() model.fit()