Пример #1
0
def evaluate_classifier(model, dataloaders, device):

    model.eval()
    set_seed()

    with torch.no_grad():
        test_acc = 0
        test_loss = 0
        predictions = []
        prediction_probs = []
        num_correct_preds = 0
        true_labels = []
        for step, batch in tqdm(enumerate(dataloaders['test_dataloader']),
                                total=len(dataloaders['test_dataloader'])):
            # Load and feed data to model
            input_ids = batch[0].to(device)
            attention_masks = batch[1].to(device)
            labels = batch[2].to(device)

            outputs = model(input_ids, attention_mask=attention_masks)
            loss, logits = outputs[:2]
            batch_loss = loss.item()
            test_loss += batch_loss

            _, predictions = torch.argmax(outputs, dim=1).to(device)

            num_correct_preds += torch.sum(predictions == labels.data)

            batch_accuracy = correct / len(labels)
            test_acc += batch_accuracy

            predictions.extend(predictions)
            prediction_probs.extend(outputs)
            true_labels.extend(labels)
            torch.cuda.empty_cache()

    test_loss = test_loss / len(dataloaders['test_dataloader'])
    test_acc = num_correct_preds / len(dataloaders['val_dataloader'])
    predictions = torch.stack(predictions).cpu()
    prediction_probs = torch.stack(prediction_probs).cpu
    true_labels = torch.stack(labels).cpu()
    performance = {}
    performance['test_loss'] = test_loss
    performance['test_accuracy'] = test_acc

    return performance, predictions, prediction_probs, true_labels
Пример #2
0
def evaluate_classifier(model, dataloader, device):

    model.eval()
    set_seed()

    with torch.no_grad():
        test_acc = 0
        test_loss = 0
        predictions_list = []
        num_samples = 0
        prediction_probs_list = []

        num_correct_preds = 0
        labels_list = []
        for step, batch in tqdm(enumerate(dataloader), total=len(dataloader)):
            # Load and feed data to model
            input_ids = batch[0].to(device)
            attention_masks = batch[1].to(device)
            labels = batch[2].to(device)

            outputs = model(input_ids, attention_masks)
            logits = outputs[0]

            predictions = torch.argmax(logits, dim=1)

            num_correct_preds += torch.sum(predictions == labels.data)
            num_samples += predictions.shape[0]

            predictions_list.append(predictions)
            prediction_probs_list.append(logits)
            labels_list.append(labels)
            torch.cuda.empty_cache()

    test_accuracy = num_correct_preds / num_samples
    performance = {}
    performance['test_accuracy'] = test_accuracy

    print(f' test accuracy: {test_accuracy:.6f}')

    return performance, torch.cat(predictions_list), torch.cat(labels_list)
Пример #3
0
def bert_train_val(model, dataloaders, starting_epoch, optimizer, scheduler,
                   epochs, device):
    print("\n\n" + "-" * 15)
    print("| TRAINING... |")
    print("-" * 15)
    set_seed()
    start_training_time = time.time()

    # Define running history for train and val
    train_loss_history = []
    val_loss_history = []
    train_acc_history = []
    val_acc_history = []

    # Training loop
    for epoch in range(starting_epoch, epochs):
        train_loss = 0
        train_acc = 0
        model.train()
        for step, batch in tqdm(enumerate(dataloaders['train_dataloader']),
                                total=len(dataloaders['train_dataloader'])):
            # Load and feed data to model
            input_ids = batch[0].to(device)
            attention_masks = batch[1].to(device)
            labels = batch[2].to(device)

            model.zero_grad()

            outputs = model(input_ids,
                            labels=labels,
                            attention_mask=attention_masks)
            loss = outputs.loss
            logits = outputs.logits

            batch_loss = loss.item()
            train_loss += batch_loss

            logits = logits.detach().cpu().numpy()
            labels = labels.to('cpu').numpy()

            predictions = np.argmax(logits, axis=1).flatten()
            # labels = labels.flatten()

            correct = 0
            for i in range(0, len(predictions)):
                if predictions[i] == labels[i]:
                    correct = correct + 1
            batch_accuracy = correct / len(labels)
            train_acc += batch_accuracy

            if step % 100 == 0:
                print("Epoch: ", epoch + 1, "/", epochs, "Batch: ", step + 1,
                      "/", len(dataloaders['train_dataloader']), "Loss: ",
                      train_loss / (step + 1), "Accuracy: ", batch_accuracy)

            loss.backward()
            # Apply gradient clipping
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            # Optimzer/Learning rate schedular step
            optimizer.step()
            scheduler.step()

            torch.cuda.empty_cache()

        # Loss and accuracy results by epoch
        end_epoch_time = time.time()
        epoch_train_accuracy = train_acc / len(dataloaders['train_dataloader'])
        epoch_train_loss = train_loss / len(dataloaders['train_dataloader'])
        epoch_train_time = format_time(start_training_time, end_epoch_time)
        train_loss_history.append(epoch_train_loss)
        train_acc_history.append(epoch_train_accuracy)

        print(
            f' epoch: {epoch + 1}, train loss: {epoch_train_loss:.6f}, train accuracy: {epoch_train_accuracy:.6f}, train time:{epoch_train_time}'
        )

        # Switch to evaluation mode and run validation
        print("Validating...")

        start_val_time = time.time()
        model.eval()
        val_loss = 0
        val_acc = 0
        with torch.no_grad():
            for step, batch in tqdm(enumerate(dataloaders['val_dataloader']),
                                    total=len(dataloaders['val_dataloader'])):
                # Load and feed data to model
                input_ids = batch[0].to(device)
                attention_masks = batch[1].to(device)
                labels = batch[2].to(device)

                model.zero_grad()

                outputs = model(input_ids,
                                labels=labels,
                                attention_mask=attention_masks)
                loss = outputs.loss
                logits = outputs.logits

                batch_loss = loss.item()
                val_loss += batch_loss

                logits = logits.detach().cpu().numpy()
                labels = labels.to('cpu').numpy()

                predictions = np.argmax(logits, axis=1).flatten()

                correct = 0
                for i in range(0, len(predictions)):
                    if predictions[i] == labels[i]:
                        correct = correct + 1

                batch_accuracy = correct / len(labels)
                val_acc += batch_accuracy

                torch.cuda.empty_cache()
                end_val_time = time.time()

        epoch_val_time = format_time(start_val_time, end_val_time)
        epoch_val_loss = val_loss / len(dataloaders['val_dataloader'])
        epoch_val_acc = val_acc / len(dataloaders['val_dataloader'])
        val_loss_history.append(epoch_val_loss)
        val_acc_history.append(epoch_val_acc)

        print(
            f' epoch: {epoch + 1}, val loss: {epoch_val_loss:.6f}, val accuracy: {epoch_val_acc:.6f}, val_time: {epoch_val_time}'
        )

        # Record results to dictionary to return
        performance_history = {
            'train_loss': train_loss_history,
            'val_loss': val_loss_history,
            'train_accuracy': train_acc_history,
            'val_accuracy': val_acc_history,
            'num_epochs': epochs
        }

        # Save model checkpoint at end of train_val run, also saves performance history
        if epoch == epochs - 1:
            checkpoint = {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
                'performance_history': performance_history,
                'epoch': epoch + 1,
            }
            save_checkpoint(checkpoint,
                            f"./BERTcheckpoint_{checkpoint['epoch']}.pth.tar")
        print("")
        print("Training Finished")

    return performance_history
def gpt2_train_val(model, dataloaders, tokenizer, starting_epoch, optimizer,
                   scheduler, epochs, device):
    print("\n\n" + "-" * 15)
    print("| TRAINING... |")
    print("-" * 15)
    set_seed()
    start_training_time = time.time()

    # Define running history for train and val
    train_loss_history = []
    val_loss_history = []
    train_perplexity_history = []
    val_perplexity_history = []

    # Training loop
    for epoch in range(starting_epoch, epochs):
        train_loss = 0
        model.train()
        for step, batch in tqdm(enumerate(dataloaders['train_dataloader']),
                                total=len(dataloaders['train_dataloader'])):
            # Load and feed data to model
            input_ids = batch.to(device)
            model.zero_grad()
            outputs = model(input_ids, labels=input_ids)

            loss = outputs[0]
            batch_loss = loss.item()
            train_loss += batch_loss

            if step % 200 == 199:
                print("Epoch:", epoch + 1, "/", epochs, "Batch:", step + 1,
                      "/", len(dataloaders['train_dataloader']), "Loss",
                      train_loss / 200)
                train_loss = 0.0

            # Generates a model output including special tokens in order to visualise the training process and model learning
            model.eval()
            if step % 100 == 0 and step != 0:
                samples = model.generate(  # decoder_start_token_id=50258,
                    bos_token_id=50257,
                    do_sample=True,
                    top_k=50,
                    max_length=50,
                    min_length=15,
                    top_p=0.95,
                    num_return_sequences=1,
                    repition_penalty=1.1,
                    no_repeat_ngram_size=2,
                    temperature=1.1)

                for i, sample in enumerate(samples):
                    print("{}".format(
                        tokenizer.decode(sample, skip_special_tokens=False)))

            # Return to train mode and back propagate loss
            model.train()
            loss.backward()
            # Apply gradient clipping
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            # Optimizer/Learning rate scheduler step
            optimizer.step()
            scheduler.step()

            torch.cuda.empty_cache()

        # Loss and perplexity results by epoch
        end_epoch_time = time.time()
        epoch_train_loss = train_loss / len(dataloaders['train_dataloader'])
        epoch_train_perplexity = torch.exp(torch.tensor(epoch_train_loss))
        epoch_train_time = format_time(start_training_time, end_epoch_time)
        train_loss_history.append(epoch_train_loss)
        train_perplexity_history.append(epoch_train_perplexity)

        print(
            f' epoch: {epoch + 1}, train loss: {epoch_train_loss:.6f}, train ppl: {epoch_train_perplexity:.6f}, train time:{epoch_train_time}'
        )

        # Switch to evaluation mode and run validation
        print("Validating...")

        start_val_time = time.time()
        model.eval()
        val_loss = 0
        val_steps = 0
        with torch.no_grad():
            for step, batch in tqdm(enumerate(dataloaders['val_dataloader']),
                                    total=len(dataloaders['val_dataloader'])):
                input_ids = batch[0].to(device)
                outputs = model(input_ids, labels=input_ids)
                loss = outputs[0]
                # loss, logits= outputs[:2] # outputs has two elements loss and logits
                batch_loss = loss.item()
                val_loss += batch_loss

                torch.cuda.empty_cache()
                end_val_time = time.time()

        epoch_val_time = format_time(start_val_time, end_val_time)
        epoch_val_loss = val_loss / len(dataloaders['val_dataloader'])
        epoch_val_perplexity = torch.exp(torch.tensor(epoch_val_loss))
        val_loss_history.append(epoch_val_loss)
        val_perplexity_history.append(epoch_val_perplexity)
        # print("Validation time: ", epoch_val_time)

        print(
            f' epoch: {epoch + 1}, val loss: {epoch_val_loss:.6f}, val ppl: {epoch_val_perplexity:.6f}, val_time: {epoch_val_time}'
        )

        # Record results to dictionary to return
        performance_history = {
            'train_loss': train_loss_history,
            'val_loss': val_loss_history,
            'train_perplexity': train_perplexity_history,
            'val_perplexity': val_perplexity_history,
            'num_epochs': epochs
        }

        # Save model checkpoint at end of train_val run, also saves performance history
        if epoch == epochs - 1:
            checkpoint = {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
                'performance_history': performance_history,
                'epoch': epoch + 1,
            }
            save_checkpoint(checkpoint,
                            f"./checkpoint_{checkpoint['epoch']}.pth.tar")
        print("")
        print("Training Finished")

    return performance_history
Пример #5
0
def test_generate(model, tokenizer, dataloaders, device):
    model.eval()
    set_seed()

    with torch.no_grad():
        test_loss = 0
        plots = []
        references = []

        for step, batch in tqdm(enumerate(dataloaders['test_dataloader']),
                                total=len(dataloaders['test_dataloader'])):

            input_ids = batch.to(device)
            outputs = model(input_ids, labels=input_ids)
            loss, logits = outputs[:2]
            batch_loss = loss.item()
            test_loss += batch_loss

            # List of genre decoded from each input_id
            genre_list = []
            # Pass input_id to references for BLEU score comparison with generated samples
            for input_id in input_ids:  #
                reference = tokenizer.decode(input_id,
                                             skip_special_tokens=True)
                x = reference.split()[0]
                references.append([reference])
                genre_list.append(x)

            # Generate sample using the same input genre as input_id
            generate = generate_text(model,
                                     tokenizer,
                                     device,
                                     num_samples=len(input_ids),
                                     input_genres=genre_list)
            plots += generate

            torch.cuda.empty_cache()

        # Smoothing function for BLEU score
        cc = SmoothingFunction()

        # bleu_score1= corpus_bleu(references, plots, weights=(1,0,0,0),smoothing_function=cc.method1)
        # BLEU score with default settings
        bleu_score_default = corpus_bleu(references,
                                         plots,
                                         smoothing_function=cc.method1)

        # BLEU score with modified weights to penalize higher n-gram precision
        bleu_score_modified1 = corpus_bleu(references,
                                           plots,
                                           weights=(0.5, 0.25, 0.25, 0),
                                           smoothing_function=cc.method1)
        bleu_score_modified2 = corpus_bleu(references,
                                           plots,
                                           weights=(0.5, 0.5),
                                           smoothing_function=cc.method1)

        # Loss and perplexity results
        mean_test_loss = test_loss / len(dataloaders['test_dataloader'])
        mean_test_perplexity = torch.exp(torch.tensor(mean_test_loss))
        mean_test_perplexity = mean_test_perplexity.cpu().numpy()

        print(
            f'test loss: {mean_test_loss:.6f}, test ppl: {mean_test_perplexity:.6f}, bleu score default:{bleu_score_default}, bleu score modified 1: {bleu_score_modified1}, bleu score modified 2: {bleu_score_modified2}'
        )

        # Save test_performance
        test_performance = {
            'mean_test_loss': mean_test_loss,
            'mean_test_perplexity': mean_test_perplexity,
            'bleu_score_default': bleu_score_default,
            'bleu_score_modified1': bleu_score_modified1,
            'bleu_score_modified2': bleu_score_modified2
        }
    return test_performance