Exemplo n.º 1
0
def main():
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Loading dataframes from CSV, can sample sample some rows for easy testing
    #While running simulations, make sure sample_ratio is set to None
    train_df = dataset.load_df(dataset_fname=config.train_fname,
                               sample_ratio=None)
    valid_df = dataset.load_df(dataset_fname=config.valid_fname,
                               sample_ratio=None)

    #for dataset loader
    train_set = dataset.dataset(train_df, max_len=config.MAX_SEQ_LEN)
    valid_set = dataset.dataset(valid_df, max_len=config.MAX_SEQ_LEN)

    #creating dataloader
    #Training set should be shuffled
    train_loader = DataLoader(train_set,
                              shuffle=True,
                              batch_size=config.BATCH_SIZE,
                              num_workers=config.NUM_CPU_WORKERS)
    #Validation set should NOT be shuffled
    test_loader = DataLoader(valid_set,
                             shuffle=False,
                             batch_size=config.BATCH_SIZE,
                             num_workers=config.NUM_CPU_WORKERS)

    #creating BERT model
    if config.TRAINED_MODEL_FNAME:
        bert_model = torch.load(config.TRAINED_MODEL_FNAME)
        print(
            f'Loaded trained model: {bert_model} from file: {config.TRAINED_MODEL_FNAME}'
        )
    else:
        bert_model = model.bert_classifier(
            freeze_bert=config.BERT_LAYER_FREEZE)
        print(f"created NEW TRANSFORMER model for finetuning: {bert_model}")
    bert_model.cuda()

    # Multi GPU setting
    if config.MULTIGPU:
        device_ids = [0, 1, 2,
                      3]  #huggingface allows parallelizing only upto 4 cards
        bert_model = nn.DataParallel(bert_model, device_ids=device_ids)
        print(f'Model parallelized on the following cards: ', device_ids)

    #loss function (with weights)
    class_weights = get_class_weigts(train_df)
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    # criterion = nn.NLLLoss(weight=class_weights)

    #optimizer and scheduler
    # optimizer = optim.Adam(bert_model.parameters(), lr=config.LR)
    # scheduler = None

    param_optimizer = list(bert_model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.001,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]

    num_train_steps = int(
        len(train_set) / config.BATCH_SIZE * config.NUM_EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=config.LR)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    train_model(bert_model,
                criterion,
                optimizer,
                scheduler,
                train_loader,
                test_loader,
                print_every=config.PRINT_EVERY,
                n_epochs=config.NUM_EPOCHS,
                device=device,
                start_training_epoch_at=config.START_TRAINING_EPOCH_AT,
                save_model=True,
                save_every=config.SAVE_EVERY)

    auc, f1 = test(bert_model,
                   test_loader,
                   device=device,
                   pred_save_fname=True)
    print(
        f'After ALL epochs: validation AUC: {round(auc,4)} and F1: {round(f1,4)}'
    )
Exemplo n.º 2
0
def main():
    # Creating instances of training and validation set
    train_set = dataset.dataset(dataset_fname=config.train_fname,
                                model_name=config.MODEL_NAME,
                                max_len=config.MAX_SEQ_LEN,
                                sample_ratio=None,
                                is_lower=config.IS_LOWER)
    test_set = dataset.dataset(dataset_fname=config.test_fname,
                               model_name=config.MODEL_NAME,
                               max_len=config.MAX_SEQ_LEN,
                               sample_ratio=None,
                               is_lower=config.IS_LOWER)

    #creating dataloader
    train_loader = DataLoader(train_set,
                              shuffle=True,
                              batch_size=config.BATCH_SIZE,
                              num_workers=config.NUM_CPU_WORKERS)
    test_loader = DataLoader(test_set,
                             shuffle=False,
                             batch_size=config.BATCH_SIZE,
                             num_workers=config.NUM_CPU_WORKERS)

    #creating BERT model
    bert_model = model.bert_classifier(
        model_name=config.MODEL_NAME,
        context_vector_size=config.CONTEXT_VECTOR_SIZE,
        freeze_bert=config.BERT_LAYER_FREEZE)
    bert_model.cuda()
    print(f"created NEW TRANSFORMER model for finetuning: {bert_model}")

    #loss function
    criterion = nn.CrossEntropyLoss()
    # criterion = nn.NLLLoss()

    #optimizer and scheduler
    optimizer = optim.Adam(bert_model.parameters(), lr=config.LR)
    scheduler = None

    # param_optimizer = list(bert_model.named_parameters())
    # no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    # optimizer_parameters = [
    #     {
    #         "params": [
    #             p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
    #         ],
    #         "weight_decay": 0.001,
    #     },
    #     {
    #         "params": [
    #             p for n, p in param_optimizer if any(nd in n for nd in no_decay)
    #         ],
    #         "weight_decay": 0.0,
    #     },
    # ]
    # num_train_steps = int(len(train_set) / config.BATCH_SIZE * config.NUM_EPOCHS)
    # optimizer = AdamW(optimizer_parameters, lr=config.LR)
    # scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    # Multi GPU setting
    if config.MULTIGPU:
        device_ids = [0, 1, 2,
                      3]  # huggingface allows parallelizing only upto 4 cards
        bert_model = nn.DataParallel(bert_model, device_ids=device_ids)
        print(f'Model parallelized on the following cards: ', device_ids)

    train_model(bert_model,
                criterion,
                optimizer,
                scheduler,
                train_loader,
                test_loader,
                print_every=config.PRINT_EVERY,
                n_epochs=config.NUM_EPOCHS,
                device=config.DEVICE)
Exemplo n.º 3
0
def main():
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Creating instances of training and validation set
    train_df, valid_df = dataset.get_train_valid_df(
        dataset_fname=config.train_fname,
        sample_ratio=config.SAMPLE_RATIO,
        valid_ratio=config.VALIDATION_SET_RATIO,
        save_dfs=False)

    #for dataset loader
    train_set = dataset.dataset(train_df, max_len=config.MAX_SEQ_LEN)
    valid_set = dataset.dataset(valid_df, max_len=config.MAX_SEQ_LEN)

    #creating dataloader
    train_loader = DataLoader(train_set,
                              shuffle=True,
                              batch_size=config.BATCH_SIZE,
                              num_workers=config.NUM_CPU_WORKERS)
    test_loader = DataLoader(valid_set,
                             shuffle=True,
                             batch_size=config.BATCH_SIZE,
                             num_workers=config.NUM_CPU_WORKERS)

    #creating BERT model
    if config.TRAINED_MODEL_FNAME:
        bert_model = torch.load(config.TRAINED_MODEL_FNAME)
        print(
            f'Loaded trained model: {bert_model} from file: {config.TRAINED_MODEL_FNAME}'
        )
    else:
        bert_model = model.bert_classifier(
            freeze_bert=config.BERT_LAYER_FREEZE)
        print(f"created NEW BERT model for finetuning: {bert_model}")
    bert_model.cuda()

    # Multi GPU setting
    if config.MULTIGPU:
        bert_model = nn.DataParallel(bert_model, device_ids=[0, 1, 2, 3])

    #loss function
    class_weights_dict = get_class_weigts(train_df)
    class_weights_dict = {
        c: torch.tensor(w).to(device).float()
        for c, w in class_weights_dict.items()
    }
    criterion = [
        nn.CrossEntropyLoss(weight=class_weights_dict[c])
        for c in sorted(class_weights_dict.keys())
    ]

    #optimizer and scheduler
    # optimizer = optim.Adam(bert_model.parameters(), lr=config.LR)
    param_optimizer = list(bert_model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.001,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]
    # print(optimizer_parameters)

    num_train_steps = int(
        len(train_set) / config.BATCH_SIZE * config.NUM_EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=config.LR)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    train_model(bert_model,
                criterion,
                optimizer,
                scheduler,
                train_loader,
                test_loader,
                print_every=config.PRINT_EVERY,
                n_epochs=config.NUM_EPOCHS,
                device=device,
                start_training_epoch_at=config.START_TRAINING_EPOCH_AT)
Exemplo n.º 4
0
def main():
    max_samples = None
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    X_train, X_valid, y_train, y_valid = load_Xy(max_samples)

    #for dataset loader
    train_set = dataset.dataset(X_train, y_train, max_len=config.MAX_SEQ_LEN)
    valid_set = dataset.dataset(X_valid, y_valid, max_len=config.MAX_SEQ_LEN)

    #creating dataloader
    #Training set should be shuffled
    train_loader = DataLoader(train_set,
                              shuffle=True,
                              batch_size=config.BATCH_SIZE,
                              num_workers=config.NUM_CPU_WORKERS)
    #Validation set should NOT be shuffled
    test_loader = DataLoader(valid_set,
                             shuffle=False,
                             batch_size=config.BATCH_SIZE,
                             num_workers=config.NUM_CPU_WORKERS)

    #creating BERT model
    if config.TRAINED_MODEL_FNAME:
        bert_model = torch.load(config.TRAINED_MODEL_FNAME)
        print(
            f'Loaded trained model: {bert_model} from file: {config.TRAINED_MODEL_FNAME}'
        )
    else:
        bert_model = model.bert_classifier(
            freeze_bert=config.BERT_LAYER_FREEZE)
        print(f"created NEW TRANSFORMER model for finetuning: {bert_model}")
    bert_model.cuda()

    # Multi GPU setting
    if config.MULTIGPU:
        device_ids = [0, 1, 2,
                      3]  #huggingface allows parallelizing only upto 4 cards
        bert_model = nn.DataParallel(bert_model, device_ids=device_ids)
        print(f'Model parallelized on the following cards: ', device_ids)

    #loss function (with weights)
    class_weights = get_class_weigts(y_train)
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    param_optimizer = list(bert_model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.001,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]

    num_train_steps = int(
        len(train_set) / config.BATCH_SIZE * config.NUM_EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=config.LR)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    train_model(bert_model,
                criterion,
                optimizer,
                scheduler,
                train_loader,
                test_loader,
                print_every=config.PRINT_EVERY,
                n_epochs=config.NUM_EPOCHS,
                device=device,
                save_model=True,
                save_every=config.SAVE_EVERY)

    acc, auc, p, r, f1 = test(bert_model,
                              test_loader,
                              device=device,
                              pred_save_fname=True)
    print(f'After ALL epochs: validation ACC: {acc:.4f} and AUC: {auc:.4f},'
          f'P: {p:.4f}, R: {r:.4f}, F1: {f1:.4f}')