Exemplo n.º 1
0
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int,
                    num_threads: int):
    import torch
    import transformers
    import contexttimer
    import benchmark_helper
    torch.set_num_threads(num_threads)
    torch.set_grad_enabled(False)

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long)
    benchmark_helper.run_model(lambda: model(input_ids), False, n, batch_size,
                               seq_len, "torch", num_threads)
Exemplo n.º 2
0
def generate_onnx_model(model_name: str, filename: str, seq_len: int,
                        batch_size: int, backend: str):
    import transformers
    import torch
    import os

    test_device = torch.device('cuda:0') if backend == "GPU" else torch.device(
        'cpu:0')
    torch.set_grad_enabled(False)

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")

    model.eval()
    model.to(test_device)

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)
    with open(filename, 'wb') as outf:
        torch.onnx.export(model=model, args=(input_ids, ), f=outf)
        outf.flush()
    return cfg.vocab_size
def generate_onnx_model(model_name: str,
                        use_gpu: bool,
                        filename: str,
                        seq_len: int,
                        batch_size: int,
                        backend: str,
                        use_dynamic_axes: bool = False):
    import transformers
    import torch
    import os

    test_device = torch.device(
        'cuda:0') if backend == "GPU" and use_gpu else torch.device('cpu:0')
    torch.set_grad_enabled(False)

    if model_name == "bert":
        # use a real model to check the correctness
        if checkonnxrest:
            model = transformers.BertModel.from_pretrained("bert-base-uncased")
        else:
            cfg = transformers.BertConfig()
            model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")

    model.eval()
    model.to(test_device)

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)

    with open(filename, 'wb') as outf:
        if not use_dynamic_axes:
            torch.onnx.export(model=model, args=(input_ids, ), f=outf)
        else:
            torch.onnx.export(model=model,
                              args=(input_ids, ),
                              f=outf,
                              input_names=['input'],
                              output_names=['output'],
                              dynamic_axes={
                                  'input': [0, 1],
                                  'output': [0, 1]
                              })
        # If not intended to make onnxruntime support variable batch size and sequence length,
        # you can unset the parameter `dynamic_axes`.
        # For some model, you have to try `opset_version=12`
        outf.flush()
    return cfg.vocab_size, cfg
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int, enable_random: bool,
                                 max_seq_len: int, min_seq_len: int,
                                 num_threads: int, use_gpu: bool,
                                 enable_mem_opt: bool):
    import torch
    import transformers
    import turbo_transformers
    import benchmark_helper
    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    cfg = None
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model, backend="turbo")
    elif model_name == "albert":
        cfg = transformers.AlbertConfig(hidden_size=768,
                                        num_attention_heads=12,
                                        intermediate_size=3072)
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    elif model_name == "distilbert":
        cfg = transformers.DistilBertConfig()
        model = transformers.DistilBertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.DistilBertModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    turbo_transformers.set_num_threads(num_threads)
    if enable_random:
        if enable_mem_opt:
            turbo_transformers.reset_allocator_schema("model-aware")
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "turbo", num_threads,
                                            cfg, enable_mem_opt, model_name)
        if enable_mem_opt:
            turbo_transformers.reset_allocator_schema("naive")
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)
        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "turbo", num_threads,
                                   enable_mem_opt, model_name)
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int, enable_random: bool,
                                 max_seq_len: int, min_seq_len: int,
                                 num_threads: int, use_gpu: bool):
    import torch
    import transformers
    import contexttimer
    import turbo_transformers
    import benchmark_helper
    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    if use_gpu:
        print("using GPU")
    else:
        print("using CPU")
    cfg = None
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    turbo_transformers.set_num_threads(num_threads)
    if enable_random:
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "turbo", num_threads,
                                            cfg)
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)

        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "turbo", num_threads)
Exemplo n.º 6
0
def benchmark_torch_jit(model_name: str, seq_len: int, batch_size: int, n: int,
                        enable_random: bool, max_seq_len: int,
                        min_seq_len: int, num_threads: int, use_gpu: bool,
                        enable_mem_opt: bool):
    import transformers
    import contexttimer
    import torch.jit
    torch.set_num_threads(num_threads)
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long)

    model = torch.jit.trace(model, (input_ids, ))

    with torch.jit.optimized_execution(True):
        model(input_ids)
        with contexttimer.Timer() as t:
            for _ in range(n):
                model(input_ids)

    print(
        json.dumps({
            "QPS": n / t.elapsed,
            "elapsed": t.elapsed,
            "n": n,
            "batch_size": batch_size,
            "seq_len": seq_len,
            "framework": "torch_jit",
            "n_threads": num_threads,
            "model_name": model_name
        }))
Exemplo n.º 7
0
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int):
    import torch
    import transformers
    import contexttimer
    import turbo_transformers
    import benchmark_helper

    if not torch.cuda.is_available():
        print("cuda is not available for torch")
        return
    test_device = torch.device('cuda:0')

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)

    benchmark_helper.run_model(lambda: model(input_ids), True, n, batch_size,
                               seq_len, "turbo")
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int,
                    enable_random: bool, max_seq_len: int, min_seq_len: int,
                    num_threads: int, use_gpu: bool, enable_mem_opt: bool):
    import torch
    import transformers
    import benchmark_helper

    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    torch.set_grad_enabled(False)
    torch.set_num_threads(num_threads)

    cfg = None
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    elif model_name == "distilbert":
        cfg = transformers.DistilBertConfig()
        model = transformers.DistilBertModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    model.to(test_device)

    # cfg = model.config  # type: transformers.BertConfig
    if enable_random:
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "torch", num_threads,
                                            cfg, enable_mem_opt, model_name)
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)
        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "torch", num_threads,
                                   enable_mem_opt, model_name)
Exemplo n.º 9
0
def test(loadtype: LoadType, use_cuda: bool):
    cfg = transformers.AlbertConfig(hidden_size=768,
                                    num_attention_heads=12,
                                    intermediate_size=3072)
    model = transformers.AlbertModel(cfg)
    model.eval()
    torch.set_grad_enabled(False)

    test_device = torch.device('cuda:0') if use_cuda else \
        torch.device('cpu:0')

    cfg = model.config
    # use 4 threads for computing
    turbo_transformers.set_num_threads(4)

    input_ids = torch.tensor(
        ([12166, 10699, 16752, 4454], [5342, 16471, 817, 16022]),
        dtype=torch.long)
    model.to(test_device)
    start_time = time.time()
    for _ in range(10):
        torch_res = model(input_ids)
    end_time = time.time()
    print("\ntorch time consum: {}".format(end_time - start_time))

    # there are three ways to load pretrained model.
    if loadtype is LoadType.PYTORCH:
        # 1, from a PyTorch model, which has loaded a pretrained model
        tt_model = turbo_transformers.AlbertModel.from_torch(model)
    else:
        raise ("LoadType is not supported")

    start_time = time.time()
    for _ in range(10):
        res = tt_model(input_ids)  # sequence_output, pooled_output
    end_time = time.time()

    print("\nturbo time consum: {}".format(end_time - start_time))
    assert (numpy.max(
        numpy.abs(res[0].cpu().numpy() - torch_res[0].cpu().numpy())) < 0.1)
Exemplo n.º 10
0
def train(args):
    logging.basicConfig(level=logging.INFO)
    tokenizer = transformers.AlbertTokenizer.from_pretrained(
        'albert-base-v2', cache_dir=cache_dir)
    albert_for_math_config = transformers.AlbertConfig(
        hidden_size=768,
        num_attention_heads=12,
        intermediate_size=3072,
    )

    if args['--load']:
        model = transformers.AlbertForMaskedLM.from_pretrained(
            args['--load-from'])
        training_args = transformers.TrainingArguments(
            output_dir=args['--save-to'],
            overwrite_output_dir=True,
            num_train_epochs=int(args['--max-epoch']),
            per_gpu_train_batch_size=int(args['--batch-size']),
            per_gpu_eval_batch_size=int(args['--batch-size']),
            logging_steps=int(args['--log-every']),
            save_steps=int(args['--save-every']),
            save_total_limit=10,
            learning_rate=float(args['--lr']),
            seed=int(args['--seed']),
        )

    else:
        model = transformers.AlbertForMaskedLM(albert_for_math_config)
        training_args = transformers.TrainingArguments(
            output_dir=args['--save-to'],
            num_train_epochs=int(args['--max-epoch']),
            per_gpu_train_batch_size=int(args['--batch-size']),
            per_gpu_eval_batch_size=int(args['--batch-size']),
            logging_steps=int(args['--log-every']),
            save_steps=int(args['--save-every']),
            save_total_limit=10,
            learning_rate=float(args['--lr']),
            seed=int(args['--seed']),
        )

    #load datasets
    print('Loading Data...')
    train_data = torch.load(
        './data/train_data_train-easy_algebra__linear_1d.pt')
    dev_data = torch.load('./data/dev_data_train-easy_algebra__linear_1d.pt')
    print('Finished loading data')
    device = torch.device("cuda:0" if args['--cuda'] else "cpu")
    model.to(device)
    trainer = transformers.Trainer(
        model=model,
        args=training_args,
        data_collator=AnswerMaskDataCollator(tokenizer),
        train_dataset=train_data,
        eval_dataset=dev_data,
        prediction_loss_only=True,
    )

    if args['--load']:
        trainer.train(model_path=args['--load-from'])
    else:
        trainer.train()
Exemplo n.º 11
0
def train_without_trainer(args):
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)
    device = torch.device("cuda:0" if args['--cuda'] else "cpu")
    batch_size = int(args['--batch-size'])
    logging_steps = int(args['--log-every'])

    tokenizer = transformers.AlbertTokenizer.from_pretrained(
        'albert-base-v2', cache_dir=cache_dir)
    albert_for_math_config = transformers.AlbertConfig(
        hidden_size=768,
        num_attention_heads=12,
        intermediate_size=3072,
    )
    print('Loading Data...')
    train_data = torch.load(
        './data/train_data_train-easy_algebra__linear_1d.pt')
    dev_data = torch.load('./data/dev_data_train-easy_algebra__linear_1d.pt')
    print('Finished loading data')
    data_collator = AnswerMaskDataCollator(tokenizer)
    train_dataloader = torch.utils.data.DataLoader(
        train_data,
        batch_size=batch_size,
        sampler=torch.utils.data.sampler.RandomSampler(train_data),
        collate_fn=data_collator.collate_batch)

    if args['--load']:
        model = transformers.AlbertForMaskedLM.from_pretrained(
            args['--load-from'])
        optimizer = get_optimizers(model, float(args['--lr']))
        optimizer.load_state_dict(
            torch.load(os.path.join(args['--load-from'], "optimizer.pt"),
                       map_location=device))
        global_step = int(args['--load-from'].split("-")[-1].split("/")[0])
        epochs_trained = global_step // (len(train_dataloader))
        steps_trained_in_current_epoch = global_step % len(train_dataloader)
        epoch = epochs_trained
        logger.info(
            "  Continuing training from checkpoint, will skip to saved global_step"
        )
        logger.info("  Continuing training from epoch %d", epochs_trained)
        logger.info("  Continuing training from global step %d", global_step)
        logger.info("  Will skip the first %d steps in the first epoch",
                    steps_trained_in_current_epoch)
    else:
        model = transformers.AlbertForMaskedLM(albert_for_math_config)
        optimizer = get_optimizers(model, float(args['--lr']))
        global_step = 0
        epochs_trained = 0
        steps_trained_in_current_epoch = 0
        epoch = 0
    model.to(device)
    for state in optimizer.state.values():
        for k, v in state.items():
            if isinstance(v, torch.Tensor):
                state[k] = v.to(device)
    max_epoch = int(args['--max-epoch'])
    t_total = len(train_dataloader) * max_epoch
    tr_loss = 0.0
    logging_loss = 0.0
    min_eval_loss = 1e20  # might be too high
    valid_niter = int(args['--valid-niter'])
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_data))
    logger.info("  Num Epochs = %d", max_epoch)
    logger.info("  train batch size = %d", batch_size)
    logger.info("  Total optimization steps = %d", t_total)
    num_eval_samples = 4096
    checkpoint_prefix = 'checkpoint'
    while (epoch < max_epoch):

        epoch_iterator = tqdm(train_dataloader, desc="Iteration")
        for step, inputs in enumerate(epoch_iterator):
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue
            tr_loss += train_step(model, inputs, device)
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           float(args['--clip-grad']))
            optimizer.step()
            model.zero_grad()
            global_step += 1
            if global_step % logging_steps == 0:
                logs: Dict[str, float] = {}
                logs["loss"] = (tr_loss - logging_loss) / logging_steps
                logs["lr"] = (optimizer.defaults['lr']
                              )  # possible RuntimeError
                logs["epoch"] = epoch
                logs["step"] = global_step
                logging_loss = tr_loss
                log(logs)
            if global_step % valid_niter == 0:
                eval_loss = 0.0
                description = "Evaluation"
                sampler = torch.utils.data.sampler.SequentialSampler(
                    dev_data[:num_eval_samples])
                eval_dataloader = torch.utils.data.DataLoader(
                    dev_data[:num_eval_samples],
                    sampler=sampler,
                    batch_size=batch_size,
                    collate_fn=data_collator.collate_batch,
                )
                logger.info("***** Running %s *****", description)
                logger.info("   Num Examples = %d", num_eval_samples)
                logger.info("   Batch size = %d", batch_size)
                for inputs in tqdm(eval_dataloader, desc=description):
                    for k, v in inputs.items():
                        inputs[k] = v.to(device)
                    model.eval()
                    with torch.no_grad():
                        outputs = model(**inputs)
                        loss = outputs[0]
                        eval_loss += loss.item()
                print("\nEvaluation loss = %f" %
                      (eval_loss / num_eval_samples))
                if eval_loss / num_eval_samples * batch_size < min_eval_loss:
                    min_eval_loss = eval_loss / num_eval_samples * batch_size
                    # save model and optimizer

                    output_dir = os.path.join(
                        args['--save-to'] + '/validations/',
                        f"{checkpoint_prefix}-{global_step}")
                    os.makedirs(output_dir, exist_ok=True)
                    model.save_pretrained(output_dir)
                    output_dir = os.path.join(args['--save-to'] +
                                              '/validations/')
                    rotate_checkpoints(output_dir)
                    output_dir = os.path.join(
                        args['--save-to'] + '/validations/',
                        f"{checkpoint_prefix}-{global_step}")
                    torch.save(optimizer.state_dict(),
                               os.path.join(output_dir, "optimizer.pt"))
            if global_step % int(args['--save-every']) == 0:
                output_dir = os.path.join(
                    args['--save-to'], f"{checkpoint_prefix}-{global_step}")
                os.makedirs(output_dir, exist_ok=True)
                model.save_pretrained(output_dir)
                output_dir = output_dir = os.path.join(args['--save-to'])
                rotate_checkpoints(output_dir)
                output_dir = os.path.join(
                    args['--save-to'], f"{checkpoint_prefix}-{global_step}")
                torch.save(optimizer.state_dict(),
                           os.path.join(output_dir, "optimizer.pt"))
        epoch_iterator.close()
        epoch += 1
    logger.info(
        "\n\nTraining completed. Do not forget to share your model on huggingface.co/models =)\n\n"
    )
Exemplo n.º 12
0

t_dataset = t_dataset.map(d_map, remove_columns=["text"], batched=True)

# 创建针对语言模型的DataCollator
t_DataCollator = DataCollatorForLanguageModeling(t_tokenizer,
                                                 mlm=True,
                                                 mlm_probability=0.3)

# 创建Albert模型配置
albert_config = transformers.AlbertConfig(
    vocab_size=len(t_tokenizer),
    embedding_size=128,
    num_hidden_layers=2,
    num_attention_heads=4,
    hidden_size=256,
    intermediate_size=512,
    pad_token_id=t_tokenizer.pad_token_id,
    bos_token_id=t_tokenizer.bos_token_id,
    eos_token_id=t_tokenizer.eos_token,
    sep_token_id=t_tokenizer.sep_token_id)

# 创建Albert语言模型
albert_model = AutoModelForMaskedLM.from_config(albert_config)
# albert_model = AlbertForMaskedLM.from_pretrained("/home/hedan/tools/Github/NLP_Based_Transformer/model/checkpoint-5000")
# albert_model.resize_token_embeddings(len(t_tokenizer))

# 配置训练参数
train_args = transformers.TrainingArguments(output_dir="./model",
                                            do_train=True,
                                            logging_steps=50,