예제 #1
0
    def __init__(
        self,
        args,
        batch_size=None,
        device=None,
        early_stop=None,
        learning_rate=None,
    ):
        super().__init__()
        if args is not None:
            # must assign values
            if args.batch_size is not None:
                batch_size = args.batch_size
            if args.device is not None:
                device = args.device
            if args.early_stop is not None:
                early_stop = args.early_stop
            if args.learning_rate is not None:
                learning_rate = args.learning_rate
        if batch_size is None:
            batch_size = batch_size_default
        if device is None:
            device = device_default
        if early_stop is None:
            early_stop = early_stop_default
        if learning_rate is None:
            learning_rate = learning_rate_default
        self.batch_size = batch_size
        self.device = get_device(device, gpuid_default)
        self.early_stop = early_stop
        self.learning_rate = learning_rate
        self.clip_norm = clip_norm_default
        self.log_per_batch = log_per_batch_default
        self.n_chars = n_chars_default
        self.char_dim = char_dim_default
        self.log_name = log_name_default
        self.train_url = train_url_default
        self.dev_url = dev_url_default
        self.test_url = test_url_default
        self.n_epochs = n_epochs_default
        self.models_path = models_path_default

        # log the parameters
        self.logger = get_logger(self.log_name)
        self.logger.info("batch_size is: %d", self.batch_size)
        self.logger.info("device is: %s", self.device.type)
        self.logger.info("early_stop is: %d", self.early_stop)
        self.logger.info("learning_rate is: %.5f", self.learning_rate)
예제 #2
0
def train(n_epochs=30,
          embedding_url=None,
          char_feat_dim=50,
          freeze=False,
          train_url=TRAIN_URL,
          dev_url=DEV_URL,
          test_url=None,
          max_region=10,
          learning_rate=0.001,
          batch_size=100,
          early_stop=5,
          clip_norm=5,
          device='auto',
          save_only_best = True
          ):
    """ Train deep exhaustive model, Sohrab et al. 2018 EMNLP

    Args:
        n_epochs: number of epochs
        embedding_url: url to pretrained embedding file, set as None to use random embedding
        char_feat_dim: size of character level feature
        freeze: whether to freeze embedding
        train_url: url to train data
        dev_url: url to dev data
        test_url: url to test data for evaluating, set to None for not evaluating
        max_region: max entity region size
        learning_rate: learning rate
        batch_size: batch_size
        early_stop: early stop for training
        clip_norm: whether to perform norm clipping, set to 0 if not need
        device: device for torch
        save_only_best: only save model of best performance
    """

    # print arguments
    arguments = json.dumps(vars(), indent=2)
    print("exhaustive model is training with arguments", arguments)
    device = get_device(device)

    train_set = ExhaustiveDataset(train_url, device=device, max_region=max_region)
    train_loader = DataLoader(train_set, batch_size=batch_size, drop_last=False,
                              collate_fn=train_set.collate_func)

    vocab = ju.load(VOCAB_URL)
    n_words = len(vocab)
    char_vocab = ju.load(VOCAB_URL.replace('vocab', 'char_vocab'))
    n_chars = len(char_vocab)

    model = ExhaustiveModel(
        hidden_size=200,
        n_tags=train_set.n_tags + 1,
        char_feat_dim=char_feat_dim,
        embedding_url=embedding_url,
        bidirectional=True,
        max_region=max_region,
        n_embeddings=n_words,
        n_chars = n_chars,
        embedding_dim=200,
        freeze=freeze
    )

    if device.type == 'cuda':
        print("using gpu,", torch.cuda.device_count(), "gpu(s) available!\n")
        # model = nn.DataParallel(model)
    else:
        print("using cpu\n")
    model = model.to(device)

    criterion = F.cross_entropy
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    max_f1, max_f1_epoch, cnt = 0, 0, 0
    # ignore the padding part when calcuting loss
    tag_weights = torch.Tensor([1] * train_set.n_tags + [0]).to(device)
    best_model_url = None

    # train and evaluate model
    for epoch in range(n_epochs):
        # switch to train mode
        model.train()
        batch_id = 0
        for data, labels, _ in train_loader:
            optimizer.zero_grad()
            outputs = model.forward(*data)
            # use weight parameter to skip padding part
            loss = criterion(outputs, labels, weight=tag_weights)
            loss.backward()
            # gradient clipping
            if clip_norm > 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip_norm)
            optimizer.step()

            endl = '\n' if batch_id % LOG_PER_BATCH == 0 else '\r'
            sys.stdout.write("epoch #%d, batch #%d, loss: %.6f, %s%s" %
                             (epoch, batch_id, loss.item(), datetime.now().strftime("%X"), endl))
            sys.stdout.flush()
            batch_id += 1

        cnt += 1
        # metrics on development set
        dev_metrics = evaluate(model, dev_url)
        if dev_metrics['f1'] > max_f1:
            max_f1 = dev_metrics['f1']
            max_f1_epoch = epoch
            if save_only_best and best_model_url:
                os.remove(best_model_url)
            best_model_url = from_project_root(
                "data/model/exhaustive_model_epoch%d_%f.pt" % (epoch, max_f1))
            torch.save(model, best_model_url)
            cnt = 0

        print("maximum of f1 value: %.6f, in epoch #%d\n" % (max_f1, max_f1_epoch))
        if cnt >= early_stop > 0:
            break
    print('\n')

    if test_url and best_model_url:
        model = torch.load(best_model_url)
        print("best model url:", best_model_url)
        print("evaluating on test dataset:", test_url)
        evaluate(model, test_url)

    print(arguments)
예제 #3
0
def train_end2end(n_epochs=80,
                  embedding_url=EMBED_URL,
                  char_feat_dim=50,
                  freeze=FREEZE_WV,
                  train_url=TRAIN_URL,
                  dev_url=DEV_URL,
                  test_url=None,
                  learning_rate=LR,
                  batch_size=BATCH_SIZE,
                  early_stop=EARLY_STOP,
                  clip_norm=MAX_GRAD_NORM,
                  bsl_model_url=None,
                  gamma=0.3,
                  device='auto',
                  save_only_best=True):
    """ Train deep exhaustive model, trained best model will be saved at 'data/model/'

    Args:
        n_epochs: number of epochs
        embedding_url: url to pre-trained embedding file, set as None to use random embedding
        char_feat_dim: size of character level feature
        freeze: whether to freeze embedding
        train_url: url to train data
        dev_url: url to dev data
        test_url: urt to test data
        learning_rate: learning rate
        batch_size: batch_size
        early_stop: early stop for training
        clip_norm: whether to perform norm clipping, set to 0 if not need
        bsl_model_url: pre-trained sequence labeler url
        gamma: percentage of region classification module loss in total loss
        device: device for torch
        save_only_best: only save model of best performance
    """

    # print arguments
    arguments = json.dumps(vars(), indent=2)
    print("arguments", arguments)
    start_time = datetime.now()

    device = get_device(device)
    train_set = End2EndDataset(train_url, device=device)
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              drop_last=False,
                              collate_fn=train_set.collate_func)

    model = End2EndModel(hidden_size=200,
                         lstm_layers=1,
                         n_tags=N_TAGS,
                         char_feat_dim=char_feat_dim,
                         embedding_url=embedding_url,
                         bidirectional=True,
                         n_embeddings=200000,
                         embedding_dim=200,
                         freeze=freeze)

    if device.type == 'cuda':
        print("using gpu,", torch.cuda.device_count(), "gpu(s) available!\n")
        # model = nn.DataParallel(model)
    else:
        print("using cpu\n")
    model = model.to(device)
    bsl_model = torch.load(bsl_model_url) if bsl_model_url else None

    criterion = F.cross_entropy
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    cnt = 0
    max_f1, max_f1_epoch = 0, 0
    best_model_url = None
    for epoch in range(n_epochs):
        # switch to train mode
        model.train()
        batch_id = 0
        for data, sentence_labels, region_labels in train_loader:
            optimizer.zero_grad()
            pred_region_labels, pred_sentence_labels = model.forward(
                *data, sentence_labels)
            classification_loss = criterion(pred_region_labels, region_labels)
            bsl_loss = criterion(pred_sentence_labels, sentence_labels)
            if bsl_model_url:
                # train condition region classifier alone
                loss = classification_loss
            else:
                # train region classifier and binary sequence labeler as a multitask learning
                loss = gamma * classification_loss + (1 - gamma) * bsl_loss
            loss.backward()

            # gradient clipping
            if clip_norm > 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               max_norm=clip_norm)
            optimizer.step()
            if batch_id % LOG_PER_BATCH == 0:
                print("epoch #%d, batch #%d, loss: %.12f, %s" %
                      (epoch, batch_id, loss.item(),
                       datetime.now().strftime("%X")))
            batch_id += 1

        cnt += 1
        # evaluating model use development dataset or and additional test dataset
        precision, recall, f1 = evaluate_e2e(model, dev_url,
                                             bsl_model).values()
        if f1 > max_f1:
            max_f1, max_f1_epoch = f1, epoch
            name = 'split' if bsl_model else 'end2end'
            if save_only_best and best_model_url:
                os.remove(best_model_url)
            best_model_url = from_project_root(
                "data/model/%s_model_epoch%d_%f.pt" % (name, epoch, f1))
            torch.save(model, best_model_url)
            cnt = 0

        # if test_url:
        #     evaluate_e2e(model, test_url, bsl_model)

        print("maximum of f1 value: %.6f, in epoch #%d" %
              (max_f1, max_f1_epoch))
        print("training time:", str(datetime.now() - start_time).split('.')[0])
        print(datetime.now().strftime("%c\n"))

        if cnt >= early_stop > 0:
            break

    if test_url:
        best_model = torch.load(best_model_url)
        print("best model url:", best_model_url)
        print("evaluating on test dataset:", test_url)
        evaluate_e2e(best_model, test_url, bsl_model)

    print(arguments)