예제 #1
0
    def run(self):
        self.dataloader.run()
        self.INPUT_DIM = len(self.dataloader.SRC.vocab)
        self.OUTPUT_DIM = len(self.dataloader.TRG.vocab)

        self.define_model()

        print("Start Training ... ")
        best_valid_loss = float('inf')
        for epoch in range(config.TRAIN_EPOCHS):
            start_time = time.time()
            train_loss, train_bleu = self.train()
            print("Start Evaluation ... ")
            valid_loss, valid_bleu = self.evaluate()
            # train_loss = self.train()
            # valid_loss = self.evaluate()
            end_time = time.time()

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(self.model.state_dict(), 'tut3-model.pt')

            print(f"Epoch Num: {epoch}")
            epoch_time(start_time, end_time)
            print_loss(train_loss, valid_loss)
            print_bleu(train_bleu, valid_bleu)
예제 #2
0
def prep_tag_files(
    src_file,
    save_path,
    src_tok,
    max_len,
    min_len,
):
    tagger = SequenceTagger.load("pos-fast")
    good_len_sentences = 0
    # save data to temporary file
    with open(os.path.join(save_path, "temp_src.txt"), "w") as src_sink:
        with open(os.path.join(save_path, "temp_trg.txt"), "w") as trg_sink:
            total_length = sum(1 for _ in open(src_file, "r"))
            print(f"total number of lines: {total_length}")
            start_time = time.time()
            for i in range(total_length):
                if i != 0 and i % 10000 == 0:
                    end_time = time.time()
                    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
                    print(f"Line {i}| Time: {epoch_mins}m {epoch_secs}s")
                    start_time = time.time()
                line = linecache.getline(src_file, i + 1)
                line, len_line = clean_and_tok(line, src_tok)
                if max_len or min_len:
                    if len_line >= min_len and len_line <= max_len:
                        good_len_sentences += 1
                        src_sink.write(" ".join(line) + "\n")
                        trg_sink.write(
                            " ".join(get_tags(line, tagger=tagger)) + "\n")

    keep_indices = [i for i in range(good_len_sentences)]
    print(f"Total number of examples {len(keep_indices)}")
    return keep_indices
예제 #3
0
def do_train(model_instance, train_features, train_label, test_features):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("Device: {}".format(device))
    optimizer = optim.Adam(model_instance.parameters(), weight_decay=0.01)
    criterion = nn.CrossEntropyLoss(weight=torch.Tensor([0.1, 0.4, 0.5]))

    model_instance = model_instance.to(device)
    criterion = criterion.to(device)

    for epoch in range(N_EPOCHS):
        start_time = time.time()
        epoch_iterator = generate_batches_train(BATCH_SIZE, train_features, train_label)

        train_loss, train_acc, train_f1 = train_epoch(model_instance, epoch_iterator, optimizer, criterion, device)

        end_time = time.time()

        epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time)

        print(f'Epoch: {epoch + 1:02} | '
              f'Epoch Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | '
              f'Train Acc: {train_acc * 100:.2f}%| '
              f'Train F1 hate_class: {utils.get_hate_class_f1(train_f1) * 100:.2f}%')

    # Test process
    test_iterator = generate_batches_test(BATCH_SIZE, test_features)
    results_test, results_test_prob = test(model_instance, test_iterator, device)
    # Export result
    utils.export_result_submit(results_test, f'./submit/submit_combine.csv', './data-bin/05_sample_submission.csv')
예제 #4
0
def do_train_model(type_embed, model_instance, model_name):
    file_log = open(f'./train-logs/log_{model_name}.txt', 'w', encoding='utf-8')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    utils.print_and_write_log("Device: {}".format(device), file_log)
    optimizer = optim.Adam(model_instance.parameters(), weight_decay=0.01)
    criterion = nn.CrossEntropyLoss(weight=torch.Tensor([0.1, 0.4, 0.5]))

    model_instance = model_instance.to(device)
    criterion = criterion.to(device)

    best_valid_f1 = -float('inf')
    # Change valid set for other model
    load_dataset.valid_data_ids = load_dataset.get_valid_data_ids(shuffer=True)
    # Train process
    for epoch in range(N_EPOCHS):
        start_time = time.time()
        epoch_iterator = load_dataset.generate_batches_train(BATCH_SIZE, type_embed)
        epoch_iterator_valid = load_dataset.generate_batches_valid(BATCH_SIZE, type_embed,
                                                                   shuffler=False)
        train_loss, train_acc, train_f1 = train_epoch(model_instance, epoch_iterator, optimizer, criterion, device,
                                                      file_log=file_log)
        valid_loss, valid_acc, valid_f1 = evaluate_epoch(model_instance, epoch_iterator_valid, criterion, device,
                                                         file_log=file_log)

        end_time = time.time()

        epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time)

        if utils.get_hate_class_f1(valid_f1) > best_valid_f1:
            utils.print_and_write_log(f'Current best {model_name} epoch {epoch}. Save model....')
            best_valid_f1 = utils.get_hate_class_f1(valid_f1)
            torch.save(model_instance.state_dict(), os.path.join(MODEL_PATH, model_name))

        utils.print_and_write_log(f'Epoch: {epoch + 1:02} | '
                                  f'Epoch Time: {epoch_mins}m {epoch_secs}s', file_log)
        utils.print_and_write_log(f'\tTrain Loss: {train_loss:.3f} | '
                                  f'Train Acc: {train_acc * 100:.2f}%| '
                                  f'Train F1 hate_class: {utils.get_hate_class_f1(train_f1) * 100:.2f}%', file_log)
        utils.print_and_write_log(f'\t Val. Loss: {valid_loss:.3f} |  '
                                  f'Val. Acc: {valid_acc * 100:.2f}%| '
                                  f'Train F1 hate_class: {utils.get_hate_class_f1(valid_f1) * 100:.2f}%', file_log)

    # Test process
    test_iterator = load_dataset.generate_batches_test(BATCH_SIZE, type_embed)
    model_instance.load_state_dict(torch.load(os.path.join(MODEL_PATH, model_name)))
    results_test, results_test_prob = test(model_instance, test_iterator, device)
    utils.export_result_combine(results_test_prob, f'./submit-combine/{model_name}_test.prob.json')
    # Export result
    utils.export_result_submit(results_test, f'./submit/submit_{model_name}.csv', './data-bin/05_sample_submission.csv')

    # Export probs for sample in train data
    train_iterator_for_combine = load_dataset.generate_batches_for_combine(BATCH_SIZE, type_embed)
    results_probs = evaluate_epoch_export_prob(model_instance, train_iterator_for_combine, device)
    utils.export_result_combine(results_probs, f'./submit-combine/{model_name}.prob.json')

    file_log.close()
예제 #5
0
    def train(self):
        print(self.model)
        print(
            f'The model has {self.model.count_params():,} trainable parameters'
        )
        best_valid_loss = float('inf')

        for epoch in range(self.params.num_epoch):
            self.model.train()
            epoch_loss = 0
            start_time = time.time()

            for batch in self.train_iter:
                # For each batch, first zero the gradients
                self.optimizer.zero_grad()
                source = batch.kor
                target = batch.eng

                # target sentence consists of <sos> and following tokens (except the <eos> token)
                output = self.model(source, target[:, :-1])[0]

                # ground truth sentence consists of tokens and <eos> token (except the <sos> token)
                output = output.contiguous().view(-1, output.shape[-1])
                target = target[:, 1:].contiguous().view(-1)
                # output = [(batch size * target length - 1), output dim]
                # target = [(batch size * target length - 1)]
                loss = self.criterion(output, target)
                loss.backward()

                # clip the gradients to prevent the model from exploding gradient
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               self.params.clip)

                self.optimizer.step()

                # 'item' method is used to extract a scalar from a tensor which only contains a single value.
                epoch_loss += loss.item()

            train_loss = epoch_loss / len(self.train_iter)
            valid_loss = self.evaluate()

            end_time = time.time()
            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(self.model.state_dict(), self.params.save_model)

            print(
                f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f}'
            )
예제 #6
0
def train(train_data, eval_data, train_gloss_dict, eval_gloss_dict, model,
          optimizer, criterion, args):

    epochs = args.epochs
    gloss_bsz = args.gloss_bsz
    max_grad_norm = args.max_grad_norm
    logger = args.logger
    if args.multigpu:
        multigpu = args.multigpu
    else:
        multigpu = False
    print(f"The number of iteration for each epoch is {len(train_data)}")

    # 평가 데이터 라벨 기록
    truth = []
    for data in eval_data:
        sense_ids_org = chain(*[list(sense_d.values()) for sense_d in data[4]])
        truth += sense_ids_org

    # 훈련
    for epoch in range(epochs):
        logger.info(f"Epoch {epoch+1} initialized.")
        model_path = f"{args.checkpoint}/saved_checkpoint_{args.checkpoint_count}"

        start_time = time.time()

        model, optimizer, total_loss = train_one_epoch(train_data,
                                                       train_gloss_dict, model,
                                                       optimizer, criterion,
                                                       model_path, args)
        end_time = time.time()
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        # 평가 데이터셋 예측
        preds = predict(eval_data, eval_gloss_dict, model)
        assert len(preds) == len(truth)
        eval_acc = np.mean(np.array(preds) == np.array(truth))
        eval_f1 = f1_score(truth, preds, average='weighted')

        logger.info(
            f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
        logger.info(f'\tTrain Loss: {total_loss:.3f}')
        logger.info(f'\tEval. Acc: {eval_acc*100:.2f}%')
        logger.info(f'\tEval. F1 : {eval_f1*100:.2f}%')

        # Saving
        torch.save(
            model,
            f"{args.checkpoint}/saved_checkpoint_{args.checkpoint_count}")
        logger.info(
            f"Checkpoint saved at {args.checkpoint}/saved_checkpoint_{args.checkpoint_count}"
        )
        args.checkpoint_count += 1
    def train(self):
        print(f'The model has {self.model.count_parameters():,} trainable parameters')

        best_valid_loss = float('inf')

        print(self.model)

        for epoch in range(self.config.num_epoch):
            self.model.train()

            epoch_loss = 0
            epoch_acc = 0

            start_time = time.time()

            for batch in self.train_iter:
                # For each batch, first zero the gradients
                self.optimizer.zero_grad()

                # if Field has include_lengths=False, batch.text is only padded numericalized tensor
                # if Field has include_lengths=True, batch.text is tuple(padded numericalized tensor, sentence length)
                input, input_lengths = batch.text
                predictions = self.model(input, input_lengths).squeeze(1)
                # predictions = [batch size, 1]. after squeeze(1) = [batch size])

                loss = self.criterion(predictions, batch.label)
                acc = binary_accuracy(predictions, batch.label)

                loss.backward()
                self.optimizer.step()

                # 'item' method is used to extract a scalar from a tensor which only contains a single value.
                epoch_loss += loss.item()
                epoch_acc += acc.item()

            train_loss = epoch_loss / len(self.train_iter)
            train_acc = epoch_acc / len(self.train_iter)

            valid_loss, valid_acc = self.evaluate()

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(self.model.state_dict(), self.config.save_model)

            print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
            print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
            print(f'\tVal. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
예제 #8
0
def main_lm(args):
    # Get data and model
    train_iterator, valid_iterator, test_iterator, src, trg, vec =\
                                                        data.get_lm_data(args)
    model = utils.create_seq2seq_model(args, src, trg, vec)

    best_valid_loss = float('inf')
    best_valid_epoch = 0
    optimizer = optim.Adam(model.parameters())
    pad_idx = src.vocab.stoi['<pad>']
    criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

    # Main loop
    for epoch in range(args.num_epochs):
        start_time = time.time()

        train_loss = train_lm(args, model, train_iterator, optimizer,
                              criterion, args.grad_clip)
        valid_loss = evaluate_lm(model, valid_iterator, criterion)

        end_time = time.time()
        epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time)

        if valid_loss < best_valid_loss:
            if not os.path.exists(folder):
                os.makedirs(folder)
            best_valid_loss = valid_loss
            best_valid_epoch = epoch
            torch.save(model.state_dict(), args.save_path)

        print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
        print(
            f'\tTrain Loss: {train_loss:.4f} | Train PPL: {math.exp(train_loss):7.4f}'
        )
        print(
            f'\t Val. Loss: {valid_loss:.4f} |  Val. PPL: {math.exp(valid_loss):7.4f}'
        )

    # Post training eval on test
    model.load_state_dict(torch.load(args.save_path))
    test_loss = evaluate(model, test_iterator, criterion)
    print('****RESULTS****')
    print(
        f'| Best Val. Loss: {best_valid_loss:.4f} | Best Val. PPL: {math.exp(best_valid_loss):7.4f} | At epoch: {best_valid_epoch} '
    )
    print(
        f'| Test Loss with best val model: {test_loss:.4f} | Test PPL: {math.exp(test_loss):7.4f} | At epoch: {best_valid_epoch} '
    )
예제 #9
0
def runner(epochs, model, train_iterator, valid_iterator, optim, writer,
           config):
    clip, save_path, model_name = config["clip"], config['data'][
        'path'], config['model_name']

    best_valid_loss = float('inf')

    for epoch in range(epochs):
        start_time = time.time()

        train_loss, train_stats = train(model, train_iterator, optim, clip)
        valid_loss, valid_stats = evaluate(model, valid_iterator)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), os.path.join(save_path, model_name))

        logger.info("-------------------------")
        logger.info(
            f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s')
        logger.info(f'\tTrain Loss: {train_loss:.3f}')
        logger.info(f'\t Val. Loss: {valid_loss:.3f}')
        logger.info(
            f'\t Train f1: {train_stats[0]} \n Valid f1: {valid_stats[0]}')
        logger.info(
            f'\t Train action accuracy: {train_stats[1]:.3f} \t Valid action accuracy: {valid_stats[1]:.3f}'
        )
        logger.info(
            f'\t Train object accuracy: {train_stats[2]:.3f} \t Valid object accuracy: {valid_stats[2]:.3f}'
        )
        logger.info(
            f'\t Train location accuracy: {train_stats[3]:.3f} \t Valid location accuracy: {valid_stats[3]:.3f}'
        )

        add_to_writer(writer, epoch, train_loss, valid_loss, train_stats,
                      valid_stats, config)

    # dumping config file
    with open(config['log_path'] + "/config.yaml", "w") as file:
        _ = yaml.dump(config, file)
예제 #10
0
    def train(self):
        " Train model using train dataset "
        print(f'The model has {self.model.count_params():,} parameters')
        best_valid_loss = float('inf')

        for epoch in range(self.params.num_epoch):
            self.model.train()
            train_loss = 0
            start_time = time.time()

            for input_ids in self.train_iter:
                self.optimizer.zero_grad()
                input_ids = input_ids.to(self.params.device)
                output = self.model(input_ids[:, :-1])
 
                preds = output.contiguous().view(-1, output.size(-1))
                # preds = [(batch size * sentence length), vocab size]
                golds = input_ids[:, 1:].contiguous().view(-1)
                # golds = [(batch size * sentence length)]
                
                loss = self.criterion(preds, golds)
                loss.backward()

                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.params.clip)
                self.optimizer.step()
                
                train_loss += loss.item()

            train_loss = train_loss / len(self.train_iter)
            valid_loss = self.validate()

            end_time = time.time()
            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(self.model.state_dict(), self.params.save_dir)

            print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
            print(f'\tTrain Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f}')
        # history for lr_finder
        history_lr_finder = {"lr": [], "loss": []}
        best_loss_lr_finder = None
        lr_scheduler = ExponentialLR(optimizer, end_lr=100, num_iter=100)

        N_EPOCHS = 150
        CLIP = 1
        best_valid_loss = float('inf')

        for epoch in range(N_EPOCHS):
            start_time = time.time()
            train_loss = train(model, train_iterator, optimizer,
                               criterion, CLIP, lr_scheduler)
            valid_loss = evaluate(model, valid_iterator, criterion)
            end_time = time.time()
            m, s = epoch_time(start_time, end_time)

            history_lr_finder["lr"].append(lr_scheduler.get_lr()[0])
            lr_scheduler.step()
            if epoch == 0:
                best_loss_lr_finder = valid_loss
            else:
                smooth_f = 0.05
                valid_loss = (smooth_f*valid_loss +
                              (1-smooth_f)*history_lr_finder["loss"][-1])
                if valid_loss < best_loss_lr_finder:
                    best_loss_lr_finder = valid_loss

            history_lr_finder["loss"].append(valid_loss)
            if valid_loss > 5 * best_loss_lr_finder:
                break
예제 #12
0
def main(args):

    # use cuda if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    SRC = torch.load(os.path.join(args.data_path, "src_vocab.pt"))
    TRG = torch.load(os.path.join(args.data_path, "trg_vocab.pt"))

    # gather parameters from the vocabulary
    input_dim = len(SRC.vocab)
    output_dim = len(TRG.vocab)
    pad_idx = SRC.vocab.stoi[SRC.pad_token]

    model_dict = torch.load(args.pretrained_model)
    dropout = model_dict["dropout"]

    prev_state_dict = model_dict["model_state_dict"]
    del model_dict

    # gather parameters except dec_hid_dim since in this model they are the same
    prev_param_dict = get_prev_params(prev_state_dict)

    # create model
    model = Seq2Seq(
        input_dim,
        prev_param_dict["emb_dim"],
        prev_param_dict["enc_hid_dim"],
        output_dim,
        prev_param_dict["enc_layers"],
        dropout,
        prev_param_dict["bidirectional"],
        pad_idx,
        device,
    ).to(device)

    model.load_state_dict(prev_state_dict)
    del prev_state_dict
    print(model)

    test_path = os.path.join(args.data_path, "test.tsv")
    test_set = LazyDataset(test_path, SRC, TRG, "evaluation")

    test_batch_sampler = BucketBatchSampler(test_path, args.batch_size)

    # build dictionary of parameters for the Dataloader
    test_loader_params = {
        # since bucket sampler returns batch, batch_size is 1
        "batch_size": 1,
        # sort_batch reverse sorts for pack_pad_seq
        "collate_fn": sort_batch,
        "batch_sampler": test_batch_sampler,
        "num_workers": args.num_workers,
        "shuffle": False,
        "pin_memory": True,
        "drop_last": False,
    }

    test_iterator = torch.utils.data.DataLoader(test_set, **test_loader_params)

    start_time = time.time()

    final_preds = []
    final_targets = []
    for i, batch in enumerate(test_iterator):
        source, target_indicies, src_len = prep_eval_batch(
            batch, device, TRG.vocab.stoi[TRG.pad_token])
        # get targets from file
        final_targets += [
            get_target(test_path, idx) for idx in target_indicies
        ]

        if args.decode_method == "beam":
            final_preds += preds_to_toks(
                beam_decode(source, src_len, TRG, model, device), TRG)

        elif args.decode_method == "greedy":
            preds = greedy_decode(source, src_len, TRG, model, device)
            # tensor to integer numpy array for quicker processing
            preds = preds.numpy().astype(int)
            final_preds += preds_to_toks(preds, TRG)

        if i % int(len(test_iterator) / 100) == 0:
            end_time = time.time()
            epoch_mins, epoch_secs = epoch_time(start_time, end_time)
            print(f" batch {i} |Time: {epoch_mins}m {epoch_secs}s")
            start_time = end_time

    if args.save_file:
        sink = open(args.save_file, "w")
        writer = csv.writer(sink, delimiter="\t")
        writer.writerows(zip(final_preds, final_targets))

    if not args.no_bleu:
        final_preds = [p.split() for p in final_preds]
        final_targets = [[t.split()] for t in final_targets]
        print(bleu_score(final_preds, final_targets))
예제 #13
0
def analyse_sentiments(params=None, model_name='', training_mode=True):
    """

    :param params:
    :param model_name:
    :return:
    """

    vector_name = params['pretrained_vectors']
    MAX_VOCAB_SIZE = params['MAX_VOCAB_SIZE']
    min_freq = params['min_freq']
    EMBEDDING_DIM = params['embedding_dim']

    FREEZE_EMDEDDINGS = params['RNN_FREEZE_EMDEDDINGS']
    HIDDEN_DIM = params['RNN_HIDDEN_DIM']  # model_params['RNN_HIDDEN_DIM']
    OUTPUT_DIM = 1  # params['OUTPUT_DIM']
    N_LAYERS = params['RNN_N_LAYERS']  # model_params['RNN_N_LAYERS']
    DROPOUT = params['RNN_DROPOUT']  # model_params['RNN_DROPOUT']
    USE_GRU = params['RNN_USE_GRU']  # model_params['RNN_USE_GRU']
    N_EPOCHS = params['RNN_EPOCHS']
    BATCH_SIZE = params['RNN_BATCH_SIZE']

    pretrained = True
    if vector_name == None:
        pretrained = False

    TEXT = torchtext.data.Field(lower=True,
                                pad_first=True,
                                batch_first=True,
                                init_token='<sos>',
                                eos_token='<eos>'
                                # include_lengths=True
                                )

    LABEL = torchtext.data.LabelField(dtype=torch.float)
    datafields = [('Sentiment', LABEL), ('SentimentText', TEXT)]
    train_set, val_set, test_set = TabularDataset.splits(
        path='../data/',
        train='processed_train.csv',
        validation='processed_val.csv',
        test='processed_test.csv',
        format='csv',
        skip_header=True,
        fields=datafields)

    if pretrained:
        vectors = load_vectors(fname=vector_name)
        TEXT.build_vocab(train_set,
                         vectors=vectors,
                         unk_init=torch.Tensor.normal_)
        vectors = TEXT.vocab.vectors
        # print(vectors.shape)
        EMBEDDING_DIM = vectors.shape[1]
    else:
        TEXT.build_vocab(train_set, max_size=MAX_VOCAB_SIZE)
    LABEL.build_vocab(train_set)
    print(f"Most frequent words in vocab. {TEXT.vocab.freqs.most_common(20)}")

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Device used is {device}")
    # minimise badding for each sentence
    train_iterator, val_iterator, test_iterator = torchtext.data.BucketIterator.splits(
        (train_set, val_set, test_set),
        batch_size=BATCH_SIZE,
        sort_key=lambda x: len(x.SentimentText),
        sort_within_batch=False,
        device=device)

    pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
    INPUT_DIM = len(TEXT.vocab)
    print(f"Vocab size is {INPUT_DIM}, emdebbing dim is {EMBEDDING_DIM}")
    model = RNNModel(vocab_size=INPUT_DIM,
                     embedding_dim=EMBEDDING_DIM,
                     hidden_dim=HIDDEN_DIM,
                     output_dim=OUTPUT_DIM,
                     n_layers=N_LAYERS,
                     bidirectional=True,
                     dropout=DROPOUT,
                     pad_idx=pad_idx,
                     use_gru=USE_GRU)
    print(model)

    if pretrained:
        model.embedding.weight.data.copy_(vectors)

    unk_idx = TEXT.vocab.stoi[TEXT.unk_token]
    init_idx = TEXT.vocab.stoi[TEXT.init_token]
    eos_idx = TEXT.vocab.stoi[TEXT.eos_token]
    print(
        f"pad_idx {pad_idx}, unk_idx {unk_idx}, init_idx {init_idx}, eos_idx {eos_idx}"
    )
    model.embedding.weight.data[unk_idx] = torch.zeros(EMBEDDING_DIM)
    model.embedding.weight.data[pad_idx] = torch.zeros(EMBEDDING_DIM)

    # freeze embeddings
    if FREEZE_EMDEDDINGS:
        model.embedding.weight.requires_grad = False
    else:
        model.embedding.weight.requires_grad = True

    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.BCEWithLogitsLoss()
    model = model.to(device)
    criterion = criterion.to(device)

    if training_mode:
        best_valid_loss = float('inf')
        for epoch in range(N_EPOCHS):
            start_time = time.time()
            model, train_loss, train_acc = train_epoch(model, train_iterator,
                                                       optimizer, criterion,
                                                       device)
            valid_loss, valid_acc = evaluate(model, val_iterator, criterion)
            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(), f"{model_name}.pt")

            print(
                f'Epoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc * 100:.2f}%'
            )

    # TODO DO TESTS AND PLOT RESULT
    # Evaluate model performance
    model.load_state_dict(torch.load(f"{model_name}.pt"))
    # print(model)

    test_loss, test_acc = evaluate(model, test_iterator, criterion)
    print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%')

    confusion_matrix(model, test_iterator, device=device, fname=model_name)

    sentence = "got a whole new wave of depression when i saw it was my rafa's losing match  I HATE YOU SODERLING"
    value = evaluate_sentences(model, sentence, TEXT, device)
    print(f"'{sentence}' sentiment is {value}")

    sentence = "STOKED for the show tomorrow night! 2 great shows combined."
    value = evaluate_sentences(model, sentence, TEXT, device)
    print(f"'{sentence}' sentiment is {value}")
    return test_loss, test_acc
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # device = torch.device('cpu')
    if torch.cuda.is_available():
        print("current device: ", torch.cuda.current_device())

    # special token
    SOPH = '<soph>'
    NSOPH = '<nsoph>'

    config = BertConfig.from_pretrained('bert-base-uncased')

    # constant the seed
    SEED = 1234

    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    num_added_token = tokenizer.add_tokens(([SOPH, NSOPH]))

    INPUT_DIM = len(tokenizer)  # len(SRC.vocab)
    OUTPUT_DIM = len(tokenizer)  # len(TRG.vocab)
    HID_DIM = 768
    DEC_LAYERS = 3
    DEC_HEADS = 8
    DEC_PF_DIM = 512
    ENC_DROPOUT = 0.1
    DEC_DROPOUT = 0.1
    SRC_PAD_IDX = 0
    TRG_PAD_IDX = 0
    BATCH_SIZE = 100
    MAX_SEQ_LEN = 50
    N_EPOCHS = 5
    CLIP = 1
    LEARNING_RATE = 0.0005
    SAVE_PATH = 'tut6-model.pt'
    LOAD_PATH = 'tut6-model.pt'

    unfreeze_bert = False
    do_load = False

    do_train = False
    do_eval = False
    do_generate = True

    dec = Decoder(OUTPUT_DIM,
                  HID_DIM,
                  DEC_LAYERS,
                  DEC_HEADS,
                  DEC_PF_DIM,
                  DEC_DROPOUT,
                  device)

    model = Seq2Seq(dec, SRC_PAD_IDX, TRG_PAD_IDX, config, device).to(device)

    # Resize tokenizer
    model.bert_encoder.resize_token_embeddings(len(tokenizer))

    model.decoder.apply(initialize_weights)

    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

    criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

    best_valid_loss = float('inf')

    processor = DiscoFuseProcessor()

    valid_iterator, num_val_ex = make_DataLoader(data_dir='./',
                                                 processor=processor,
                                                 tokenizer=tokenizer,
                                                 max_seq_length=MAX_SEQ_LEN,
                                                 batch_size=BATCH_SIZE,
                                                 mode="dev",
                                                 SOPH=SOPH,
                                                 NSOPH=NSOPH,
                                                 domain="sports")

    if do_train:
        for param in model.bert_encoder.parameters():
            param.requires_grad = unfreeze_bert

        print(f'The model has {count_parameters(model):,} trainable parameters')

        train_iterator, num_tr_ex = make_DataLoader(data_dir='./',
                                                    processor=processor,
                                                    tokenizer=tokenizer,
                                                    max_seq_length=MAX_SEQ_LEN,
                                                    batch_size=BATCH_SIZE,
                                                    mode="train",
                                                    SOPH=SOPH,
                                                    NSOPH=NSOPH)

        print("---- Begin Training ----")
        if do_load and os.path.exists(LOAD_PATH):
            print("---- Loading model from {} ----".format(LOAD_PATH))
            model.load_state_dict(torch.load(LOAD_PATH))

        for epoch in range(N_EPOCHS):

            start_time = time.time()

            num_batches_in_epoch = int(num_tr_ex/BATCH_SIZE)  # 10000

            train_loss = train(model, train_iterator, optimizer, criterion, CLIP,  num_batches_in_epoch, device=device)
            valid_loss, valid_exact = evaluate(model, valid_iterator, criterion, device=device, tokenizer=tokenizer)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(), SAVE_PATH)

            print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s')
            print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
            print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')
            print(f'\t Val. EXACT: {valid_exact:.2f}')

    elif do_eval:
        print("Doing only evaluation")
        model.load_state_dict(torch.load(LOAD_PATH))
        valid_loss, valid_exact = evaluate(model, valid_iterator, criterion, device=device, tokenizer=tokenizer)
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. EXACT: {valid_exact:3.3f}')

    elif do_generate:
        print("Doing only generation")
        model.load_state_dict(torch.load(LOAD_PATH))
        all_predictions, all_trgs, all_counter_predictions = generate(model, valid_iterator, device, tokenizer)
        all_counter_pred_str = [" ".join(a).replace(" ##", "") for a in all_counter_predictions]
        all_pred_str = [" ".join(a).replace(" ##", "") for a in all_predictions]
        all_trgs_str = [" ".join(a).replace(" ##", "") for a in all_trgs]
        with open("generated_fuse.txt", 'a') as fp:
            for i in range(len(all_predictions)):
                counter_pred_line = "Counter pred: " + all_counter_pred_str[i] + "\n"
                pred_line = "Origin pred:  " + all_pred_str[i] + "\n"
                trg_line = "origin trg:   " + all_trgs_str[i] + "\n\n"
                fp.writelines(counter_pred_line)
                fp.writelines(pred_line)
                fp.writelines(trg_line)

    else:
        raise ValueError("Error - must either train evaluate, or generate!")
예제 #15
0
#-------------------------train & valid--------------------------------

model_init(model)

optimizer = NoamOpt(HID_DIM,
                    factor=1,
                    warmup=2000,
                    optimizer=optim.Adam(model.parameters(),
                                         lr=0,
                                         betas=(0.9, 0.98),
                                         eps=1e-9))
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

for epoch in range(N_EPOCH):

    start_time = time.time()

    train_loss, train_score = train(model, train_iterator, optimizer,
                                    criterion, CLIP, cor)
    valid_loss, valid_score = evaluate(model, valid_iterator, criterion, cor)

    end_time = time.time()

    EpochTime = epoch_time(start_time=start_time, end_time=end_time)

    print("Epoch No.", epoch, " Time: ", EpochTime[0], "min", EpochTime[1],
          "sec\n ", " Train Loss: ", train_loss, " Train PPL: ",
          math.exp(train_loss), " Train_BLEU: ",
          train_score, "\nValid Loss: ", valid_loss, " Valid PPL: ",
          math.exp(valid_loss), " Valid_BLEU: ", valid_score)
예제 #16
0
def train_model(
    model,
    iterator,
    task,
    optimizer,
    criterion,
    clip,
    device,
    epoch,
    start_time,
    save_path,
    dropout,
    pad_indices,
    num_batches,
    teacher_forcing=None,
    checkpoint=None,
    repr_layer=None,
):

    model.train()
    epoch_loss = 0
    batch_loss = []
    if task == "tagging":
        # save 10 times throughout training
        save_loss = np.linspace(0, num_batches, num=10, dtype=int)
    elif task == "translation":
        # save 100 times throughout training
        save_loss = np.linspace(0, num_batches, num=100, dtype=int)

    try:
        for i, batch in enumerate(iterator):
            source, targets, src_len = prep_batch(batch, device, pad_indices)
            optimizer.zero_grad()
            loss = train_step(
                model,
                source,
                src_len,
                targets,
                task,
                criterion,
                optimizer,
                clip,
                teacher_forcing,
            )

            epoch_loss += loss
            if i in save_loss:
                batch_loss.append(loss)
                end_time = time.time()

                batch_mins, batch_secs = epoch_time(start_time, end_time)

                print(
                    f"epoch {epoch} batch: {i} | Train loss: {loss:.3f} | Time: {batch_mins}m {batch_secs}s"
                )
                start_time = end_time

            # optionally checkpoint
            if checkpoint is not None:
                if i % checkpoint == 0:
                    adam, sparse_adam = optimizer.return_optimizers()
                    torch.save(
                        {
                            "epoch": epoch,
                            "model_state_dict": model.state_dict(),
                            "adam_state_dict": adam.state_dict(),
                            "sparse_adam_state_dict": sparse_adam.state_dict(),
                            "loss": loss,
                            "dropout": dropout,
                            "repr_layer": repr_layer,
                        },
                        os.path.join(save_path, f"checkpoint_{epoch}_{i}.pt"),
                    )
                    print(
                        f"Checkpoint saved at epoch {epoch} batch {i}. Train loss is {loss:.3f}"
                    )
    # skip batch in case of OOM
    except RuntimeError as e:
        if "out of memory" in str(e):
            print(f"| WARNING: ran out of memory, skipping batch number {i:,}")

    return epoch_loss / num_batches, batch_loss
예제 #17
0
    result = dict()

    # training and evaluate
    for tokenizer_name, tokenizer in zip(tokenizer_names, tokenizers):
        print(f'-------------------------------------------------------------')
        print(f'Data loading with {tokenizer_name} tokenizer...')
        start_time = time.time()
        TEXT, LABEL, train_iterator, test_iterator = dataloader(tokenizer,
                                                                args.max_vocab_size,
                                                                args.batch_size, device)
        input_dim = len(TEXT.vocab)
        print(f'The number of vocabularies is {input_dim}.')

        end_time = time.time()
        data_loading_time = round(end_time - start_time,3)
        data_prep_mins, data_prep_secs = epoch_time(start_time, end_time)
        print(f'Data loading Time: {data_prep_mins}m {data_prep_secs}s')


        pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
        model = RNN(input_dim, args.embedding_dim,
                    args.hidden_dim, 1, args.n_layers, 
                    args.bidirectional, args.dropout, pad_idx)
        model.embedding.weight.data[pad_idx] = torch.zeros(args.embedding_dim)

        optimizer = optim.Adam(model.parameters())
        criterion = nn.BCEWithLogitsLoss()

        model = model.to(device)
        criterion = criterion.to(device)
예제 #18
0
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Preprocessing
    pp_start_time = time.time()

    trainloader, testloader = get_dataloaders(args)

    pp_end_time = time.time()
    pp_mins, pp_secs = epoch_time(pp_end_time - pp_start_time)
    print(f'Preprocessing time: {pp_mins}m {pp_secs}s')

    with wandb.init(project='RegulQuant', entity='womeiyouleezi', config=args):
        if args.run_name:
            wandb.run.name = args.run_name
        if (not args.save_file):
            file_name = wandb.run.name
        else:
            file_name = args.save_file

        # make model
        net = get_model(args).to(device)
        #net = ConvNet().to(device)

        # unpack args
def run():
    Seed = 1234
    random.seed(Seed)
    np.random.seed(Seed)
    torch.manual_seed(Seed)
    torch.cuda.manual_seed(Seed)
    torch.backends.cudnn.deterministic = True
    train, valid, test, SRC, TRG = dataset.create_dataset()
    train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
        (train, valid, test),
        sort_key=lambda x: len(x.source),
        batch_size=config.BATCH_SIZE,
        device=config.device)

    INPUT_DIM = len(SRC.vocab)
    OUTPUT_DIM = len(TRG.vocab)

    ENC_EMB_DIM = config.ENCODER_EMBEDDING_DIMENSION
    DEC_EMB_DIM = config.DECODER_EMBEDDING_DIMENSION
    HID_DIM = config.LSTM_HIDDEN_DIMENSION
    N_LAYERS = config.LSTM_LAYERS
    ENC_DROPOUT = config.ENCODER_DROPOUT
    DEC_DROPOUT = config.DECODER_DROPOUT

    attn = model.Attention(HID_DIM, HID_DIM)
    enc = model.Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, HID_DIM, ENC_DROPOUT)
    dec = model.Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, HID_DIM, DEC_DROPOUT,
                        attn)

    model_rnn = model.Seq2Seq(enc, dec, config.device).to(config.device)

    optimizer = optim.Adam(model_rnn.parameters(), lr=config.LEARNING_RATE)

    TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

    criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

    if (args.action == 'train'):
        model_rnn.apply(utils.init_weights)

        best_valid_loss = float('inf')

        for epoch in range(config.N_EPOCHS):
            start_time = time.time()

            train_loss = engine.train_fn(model_rnn, train_iterator, optimizer,
                                         criterion, config.CLIP)
            valid_loss = engine.evaluate_fn(model_rnn, valid_iterator,
                                            criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model_rnn.state_dict(), config.MODEL_SAVE_FILE)

            with open(config.RESULTS_SAVE_FILE, 'a') as f:
                print(
                    f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s',
                    file=f)
                print(
                    f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}',
                    file=f)
                print(
                    f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}',
                    file=f)

    elif (args.action == 'test'):
        model_rnn.load_state_dict(torch.load(config.TEST_MODEL))
        loss, target, output = engine.test_fn(model_rnn, test_iterator,
                                              criterion, SRC, TRG)
        bl = bleu_score(output, target, max_n=1, weights=[1])
        met = 0

        for z in range(len(output)):
            out = ' '.join(output[z][y]
                           for y in range(min(10, len(output[z]))))
            tar = ' '.join(y for y in target[z])

            met = met + metric_utils.compute_metric(out, 1.0, tar)

        with open(config.TEST_RESULTS_FILE, 'w') as f:
            print(f'Test bleu :, {bl*100}, Test PPL: {math.exp(loss):7.3f}',
                  'Metric:',
                  met / len(output),
                  file=f)

    elif (args.action == 'save_vocab'):
        print('Source Vocab Length', len(SRC.vocab))
        print('Target vocab length', len(TRG.vocab))
        s1 = '\n'.join(k for k in SRC.vocab.itos)
        s2 = '\n'.join(k for k in TRG.vocab.itos)
        with open('NL_vocabulary.txt', 'w') as f:
            f.write(s1)
        with open('Bash_vocabulary.txt', 'w') as f:
            f.write(s2)
예제 #20
0
def main(args):

    # use cuda if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # create directory for saving models if it doesn't already exist
    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)

    SRC = torch.load(os.path.join(args.nmt_data_path, "src_vocab.pt"))
    TRG = torch.load(os.path.join(args.data_path, "trg_vocab.pt"))

    # gather parameters from the vocabulary
    input_dim = len(SRC.vocab)
    output_dim = len(TRG.vocab)
    pad_idx = SRC.vocab.stoi[SRC.pad_token]

    # create lazydataset and data loader
    train_path = os.path.join(args.data_path, "train.tsv")
    training_set = LazyDataset(train_path, SRC, TRG, "tagging")

    train_batch_sampler = BucketBatchSampler(train_path, args.batch_size)
    # number of batches comes from the sampler, not the iterator
    num_batches = train_batch_sampler.num_batches

    # build dictionary of parameters for the Dataloader
    train_loader_params = {
        # since bucket sampler returns batch, batch_size is 1
        "batch_size": 1,
        # sort_batch reverse sorts for pack_pad_seq
        "collate_fn": sort_batch,
        "batch_sampler": train_batch_sampler,
        "num_workers": args.num_workers,
        "shuffle": args.shuffle,
        "pin_memory": True,
        "drop_last": False,
    }

    train_iterator = torch.utils.data.DataLoader(training_set,
                                                 **train_loader_params)

    # load pretrained-model
    prev_state_dict = torch.load(args.pretrained_model,
                                 map_location=torch.device("cpu"))
    enc_dropout = prev_state_dict["dropout"]
    prev_state_dict = prev_state_dict["model_state_dict"]

    # gather parameters except dec_hid_dim since tagger gets this from args
    prev_param_dict = get_prev_params(prev_state_dict)

    new_state_dict = make_encoder_dict(prev_state_dict)

    if args.repr_layer == "embedding":
        new_dict = {}
        # add embedding layer
        new_dict["enc_embedding.weight"] = new_state_dict[
            "enc_embedding.weight"]
        # replace state dict with new dict
        new_state_dict = new_dict
    elif args.repr_layer == "rnn1":
        new_dict = {}
        # add embedding layer
        new_dict["enc_embedding.weight"] = new_state_dict[
            "enc_embedding.weight"]
        # add first layer weights and bias
        for k, v in new_state_dict.items():
            if "l0" in k:
                new_dict[k] = v
        # replace state dict with new dict
        new_state_dict = new_dict

    model = Tagger(
        new_state_dict=new_state_dict,
        input_dim=input_dim,
        emb_dim=prev_param_dict["emb_dim"],
        enc_hid_dim=prev_param_dict["enc_hid_dim"],
        dec_hid_dim=args.hid_dim,
        output_dim=output_dim,
        enc_layers=prev_param_dict["enc_layers"],
        dec_layers=args.n_layers,
        enc_dropout=enc_dropout,
        dec_dropout=args.dropout,
        bidirectional=prev_param_dict["bidirectional"],
        pad_idx=pad_idx,
        repr_layer=args.repr_layer,
    ).to(device)

    # optionally randomly initialize weights
    if args.random_init:
        model.apply(random_init_weights)

    print(model)
    print(f"The model has {count_parameters(model):,} trainable parameters")

    optimizer = make_muliti_optim(model.named_parameters(), args.learning_rate)

    if args.unfreeze_encoder == False:
        for param in model.encoder.parameters():
            param.requires_grad = False

    SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
    TRG_PAD_IDX = len(TRG.vocab) + 1
    criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

    best_valid_loss = float("inf")

    # training
    batch_history = []
    epoch_history = []
    for epoch in range(1, args.epochs + 1):
        start_time = time.time()
        train_loss, batch_loss = train_model(
            model=model,
            iterator=train_iterator,
            task="tagging",
            optimizer=optimizer,
            criterion=criterion,
            clip=args.clip,
            device=device,
            epoch=epoch,
            start_time=start_time,
            save_path=args.save_path,
            pad_indices=(SRC_PAD_IDX, TRG_PAD_IDX),
            dropout=(enc_dropout, args.dropout),
            checkpoint=args.checkpoint,
            repr_layer=args.repr_layer,
            num_batches=num_batches,
        )
        batch_history += batch_loss
        epoch_history.append(train_loss)
        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        model_filename = os.path.join(args.save_path,
                                      f"model_epoch_{epoch}.pt")
        adam, sparse_adam = optimizer.return_optimizers()
        if not args.only_best:
            torch.save(
                {
                    "epoch": epoch,
                    "model_state_dict": model.state_dict(),
                    "adam_state_dict": adam.state_dict(),
                    "sparse_adam_state_dict": sparse_adam.state_dict(),
                    "loss": valid_loss,
                    "dropout": (enc_dropout, args.dropout),
                    "repr_layer": args.repr_layer,
                },
                model_filename,
            )

        # optionally validate
        if not args.skip_validate:
            valid_path = os.path.join(args.data_path, "valid.tsv")
            valid_set = LazyDataset(valid_path, SRC, TRG, "tagging")
            valid_batch_sampler = BucketBatchSampler(valid_path,
                                                     args.batch_size)
            # number of batches comes from the sampler, not the iterator
            valid_num_batches = valid_batch_sampler.num_batches
            valid_loader_params = {
                # since bucket sampler returns batch, batch_size is 1
                "batch_size": 1,
                # sort_batch reverse sorts for pack_pad_seq
                "collate_fn": sort_batch,
                "batch_sampler": valid_batch_sampler,
                "num_workers": args.num_workers,
                "shuffle": args.shuffle,
                "pin_memory": True,
                "drop_last": False,
            }

            valid_iterator = torch.utils.data.DataLoader(
                valid_set, **valid_loader_params)

            valid_loss = evaluate_model(
                model,
                valid_iterator,
                num_batches=valid_num_batches,
                optimizer=optimizer,
                criterion=criterion,
                task="tagging",
                device=device,
                pad_indices=(SRC_PAD_IDX, TRG_PAD_IDX),
            )

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss

                best_filename = os.path.join(args.save_path, f"best_model.pt")
                torch.save(
                    {
                        "epoch": epoch,
                        "model_state_dict": model.state_dict(),
                        "adam_state_dict": adam.state_dict(),
                        "sparse_adam_state_dict": sparse_adam.state_dict(),
                        "loss": valid_loss,
                        "dropout": (enc_dropout, args.dropout),
                        "repr_layer": args.repr_layer,
                    },
                    best_filename,
                )

            print(f"Epoch: {epoch:02} | Time: {epoch_mins}m {epoch_secs}s")
            print(
                f"\t Train Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}"
            )
            print(
                f"\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}"
            )

        else:
            print(f"Epoch: {epoch:02} | Time: {epoch_mins}m {epoch_secs}s")
            print(
                f"\t Train Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}"
            )

    if args.loss_plot:
        make_loss_plot(batch_history, args.save_path, args.epochs)
예제 #21
0
model.apply(initialize_weights)

optimizer = optim.Adam(model.parameters(), lr=lr)
target_pad_idx = en_field.vocab.stoi[en_field.pad_token]
criterion = nn.CrossEntropyLoss(ignore_index=target_pad_idx)

best_val_loss = float('inf')
writer = SummaryWriter(log_dir)
for epoch in range(num_epochs):
    s = time.time()
    train_loss = train(model, train_loader, optimizer, criterion, clip=1)
    val_loss = evaluate(model, val_loader, criterion)

    t = time.time()

    epoch_min, epoch_sec = epoch_time(s, t)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), os.path.join(ckpt_dir, model_name))

    print("Epoch : %02d | Elapsed Time : %02d min %02d sec" %
          (epoch + 1, epoch_min, epoch_sec))
    print("\t Train Loss : %.3f | Train PPL : %7.3f" %
          (train_loss, math.exp(train_loss)))
    print("\t Val   Loss : %.3f | Val   PPL : %7.3f" %
          (val_loss, math.exp(val_loss)))
    writer.add_scalars(''.join([str(model_name), '/Train and Val Loss']), {
        "Train_Loss": train_loss,
        "Val_Loss": val_loss
    }, epoch + 1)
예제 #22
0
def main():
    g_tokenizer = Tokenizer("de", rev=True)
    e_tokenizer = Tokenizer("en")

    SRC = Field(
        tokenize=g_tokenizer,
        init_token='<sos>',
        eos_token='<eps>',
        lower=True
    )

    TRG = Field(
        tokenize=e_tokenizer,
        init_token='<sos>',
        eos_token='<eos>',
        lower=True
    )

    train_data, valid_data, test_data = Multi30k.splits(
        exts=('.de', '.en'),
        fields=(SRC, TRG)
    )

    print(f'train: {len(train_data.examples)}')
    print(f'valid: {len(valid_data.examples)}')
    print(f'test : {len(test_data.examples)}')

    SRC.build_vocab(train_data, min_freq=2)
    TRG.build_vocab(train_data, min_freq=2)

    print(f'SRC vocab: {len(SRC.vocab)}')
    print(f'TRG vocab: {len(TRG.vocab)}')

    train_iter, valid_iter, test_iter = BucketIterator.splits(
        (train_data, valid_data, test_data),
        batch_sizes=(BATCH_SIZE, BATCH_SIZE, BATCH_SIZE),
        device=torch.device('cuda')
    )

    encoder = Encoder(len(SRC.vocab), 256, 512, 2, 0.5)
    decoder = Decoder(len(TRG.vocab), 256, 512, 2, 0.5)

    model = Seq2Seq(encoder, decoder, torch.device('cuda')).cuda()
    model.apply(init_weights)

    print(count_parameters(model))

    optimizer = optim.Adam(model.parameters())
    TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
    criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

    n_epochs = 10
    clip = 1

    best_valid_loss = float('inf')

    for epoch in range(n_epochs):
        start_time = time.time()

        train_loss = train(model, train_iter, optimizer, criterion, clip)
        valid_loss = evaluate(model, valid_iter, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), 'best_model.pth')
        
        print(f'epoch {epoch}| time: {epoch_mins}m {epoch_secs}s')
        print(f'train loss: {train_loss} | train ppl: {math.exp(train_loss)}')
        print(f'valid loss: {valid_loss} | valid ppl: {math.exp(valid_loss)}')
        print()

    model.load_state_dict(torch.load('best_model.pth'))
    test_loss = evaluate(model, test_iter, criterion)

    print(f'test loss: {test_loss} | test ppl: {math.exp(test_loss)}')
    print()
    print('DONE')
예제 #23
0
def main_classification(args):
    print('Get data and model')
    ma_iterators, reiss_iterators, text, vec = data.get_cl_data(args)
    maslow_train_it, maslow_valid_it, maslow_test_it, maslow_label = ma_iterators
    reiss_train_it, reiss_valid_it, reiss_test_it, reiss_label = reiss_iterators

    # Number of labels per task:
    classes = [len(maslow_label.vocab), len(reiss_label.vocab)]
    if args.model == 'seq2seq':
        model = utils.create_seq2seq_model_cl(args, text, text, vec,
                                              maslow_label, reiss_label)
    elif args.model == 'gpt2':
        model = models.GPT2Classifier(classes, args.gpttokenizer).to(device)

    best_valid_loss = float('inf')
    best_valid_epoch = 0
    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    print('Training starting...')
    # Main loop
    for epoch in range(args.num_epochs):
        start_time = time.time()

        # Training
        train_loss, ma_tr_pred, ma_tr_true, re_tr_pred, re_tr_true = \
                train_cl(args, model, maslow_train_it, reiss_train_it,
                        optimizer,criterion, args.grad_clip)

        # Validation
        valid_loss, ma_v_pred, ma_v_true, re_v_pred, re_v_true = \
                evaluate_cl(model, maslow_valid_it, reiss_valid_it, criterion)
        # Test
        test_loss, ma_t_pred, ma_t_true, re_t_pred, re_t_true = \
                evaluate_cl(model, maslow_test_it, reiss_test_it, criterion)

        end_time = time.time()
        epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            best_valid_epoch = epoch

        # Maslow
        tr_acc = accuracy_score(ma_tr_true, ma_tr_pred)
        v_acc = accuracy_score(ma_v_true, ma_v_pred)
        v_f1 = f1_score(ma_v_true, ma_v_pred, average='macro')
        v_p = precision_score(ma_v_true, ma_v_pred, average='macro')
        v_r = recall_score(ma_v_true, ma_v_pred, average='macro')
        t_acc = accuracy_score(ma_t_true, ma_t_pred)
        t_f1 = f1_score(ma_t_true, ma_t_pred, average='macro')
        t_p = precision_score(ma_t_true, ma_t_pred, average='macro')
        t_r = recall_score(ma_t_true, ma_t_pred, average='macro')

        print('Maslow')
        print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.4f} | acc: {tr_acc:7.4f}')
        print(
            f'\t Val. Loss: {valid_loss:.4f} | acc: {v_acc:7.4f} | f1: {v_f1:7.4f} | prec: {v_p:7.4f} | rec: {v_r:7.4f}'
        )
        print(
            f'\t Test Loss: {test_loss:.4f} | acc: {t_acc:7.4f} | f1: {t_f1:7.4f} | prec: {t_p:7.4f} | rec: {t_r:7.4f}'
        )

        # Reiss
        tr_acc = accuracy_score(re_tr_true, re_tr_pred)
        v_acc = accuracy_score(re_v_true, re_v_pred)
        v_f1 = f1_score(re_v_true, re_v_pred, average='macro')
        v_p = precision_score(re_v_true, re_v_pred, average='macro')
        v_r = recall_score(re_v_true, re_v_pred, average='macro')
        t_acc = accuracy_score(re_t_true, re_t_pred)
        t_f1 = f1_score(re_t_true, re_t_pred, average='macro')
        t_p = precision_score(re_t_true, re_t_pred, average='macro')
        t_r = recall_score(re_t_true, re_t_pred, average='macro')

        print('Reiss')
        print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.4f} | acc: {tr_acc:7.4f}')
        print(
            f'\t Val. Loss: {valid_loss:.4f} | acc: {v_acc:7.4f} | f1: {v_f1:7.4f} | prec: {v_p:7.4f} | rec: {v_r:7.4f}'
        )
        print(
            f'\t Test Loss: {test_loss:.4f} | acc: {t_acc:7.4f} | f1: {t_f1:7.4f} | prec: {t_p:7.4f} | rec: {t_r:7.4f}'
        )
예제 #24
0
파일: train.py 프로젝트: sailfish009/DDANet
                                                           verbose=True)
    loss_fn = nn.BCEWithLogitsLoss()
    # loss_fn = nn.BCEWithLogitsLoss()
    loss_fn = DiceBCELoss()
    loss_name = "BCE Dice Loss"

    data_str = f"Hyperparameters:\nImage Size: {size}\nBatch Size: {batch_size}\nLR: {lr}\nEpochs: {num_epochs}\n"
    data_str += f"Optimizer: Adam\nLoss: {loss_name}\n"
    print_and_save(train_log, data_str)
    """ Training the model. """
    best_valid_loss = float('inf')

    for epoch in range(num_epochs):
        start_time = time.time()

        train_loss = train(model, train_loader, optimizer, loss_fn, device)
        valid_loss = evaluate(model, valid_loader, loss_fn, device)
        scheduler.step(valid_loss)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), checkpoint_path)

        end_time = time.time()
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        data_str = f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s\n'
        data_str += f'\tTrain Loss: {train_loss:.3f}\n'
        data_str += f'\t Val. Loss: {valid_loss:.3f}\n'
        print_and_save(train_log, data_str)
예제 #25
0
            utils.SaveModel(m2p2_model, MODEL_DIR+'opt/', mod_weights)

        # train the reference models (slave procedure in alg 1)
        if WITH_HET_MODULE:
            for ref_epoch in range(utils.n_EPOCHS):
                _ = train.train_ref(m2p2_model, ref_model, cri_pers, tra_loader, ref_model_optim, False)
        # end of slave procedure

        # apply the trained reference models to get current concat weights
        tilde_mod_weights = train.train_ref(m2p2_model, ref_model, cri_pers, val_loader, ref_model_optim, True)
        # moving average by combing current concat weights with previous concat weights
        if WITH_HET_MODULE: utils.update_mod_weights(mod_weights, tilde_mod_weights)

        # gather information and print in verbose mode
        end_time = time.time()
        epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time)
        if epoch % 1 == 0 and VERBOSE:
            print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s')
            if WITH_HET_MODULE:
                print('modality weights', mod_weights)
            print(f'\tTrain alignment loss:{train_emb_loss:.5f}\tTrain persuasion loss:{train_pers_loss:.5f}')
            print(f'\tVal alignment loss:{val_emb_loss:.5f}\tVal persuasion loss:{val_pers_loss:.5f}')
    ##### end of training process (master procedure in alg 1) #####

else:
    ##### load pre-trained model and test #####
    mod_weights = utils.LoadModelDict(m2p2_model, PRETRAIN_MODEL_DIR)
    tes_emb_loss, tes_pers_loss = train.train_m2p2(m2p2_model, tes_loader, m2p2_optim, cri_align, cri_pers,
                                                   COSINE, mod_weights, GAMMA, evaluate=True)
    print('MSE:',round(tes_pers_loss, 3))
    ##### end of testing #####
예제 #26
0
파일: train.py 프로젝트: lin-justin/humor
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--model',
        type=str,
        default='rnn',
        help=
        "Available models are: 'rnn', 'cnn', 'bilstm', 'fasttext', and 'distilbert'\nDefault is 'rnn'"
    )
    parser.add_argument('--train_data_path',
                        type=str,
                        default="./data/train_clean.csv",
                        help="Path to the training data")
    parser.add_argument('--test_data_path',
                        type=str,
                        default="./data/dev_clean.csv",
                        help="Path to the test data")
    parser.add_argument('--seed', type=int, default=1234)
    parser.add_argument('--vectors',
                        type=str,
                        default='fasttext.simple.300d',
                        help="""
                                Pretrained vectors:
                                Visit 
                                https://github.com/pytorch/text/blob/9ce7986ddeb5b47d9767a5299954195a1a5f9043/torchtext/vocab.py#L146
                                for more 
                                """)
    parser.add_argument('--max_vocab_size', type=int, default=750)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--bidirectional', type=bool, default=True)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--hidden_dim', type=int, default=64)
    parser.add_argument('--output_dim', type=int, default=1)
    parser.add_argument('--n_layers', type=int, default=2)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--n_epochs', type=int, default=5)
    parser.add_argument('--n_filters', type=int, default=100)
    parser.add_argument('--filter_sizes', type=list, default=[3, 4, 5])

    args = parser.parse_args()

    torch.manual_seed(args.seed)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    ##########  BILSTM ##########

    if args.model == "bilstm":
        print('\nBiLSTM')
        TEXT = Field(tokenize='spacy')
        LABEL = LabelField(dtype=torch.float)
        data_fields = [("text", TEXT), ("label", LABEL)]

        train_data = TabularDataset(args.train_data_path,
                                    format='csv',
                                    fields=data_fields,
                                    skip_header=True,
                                    csv_reader_params={'delimiter': ","})

        test_data = TabularDataset(args.test_data_path,
                                   format='csv',
                                   fields=data_fields,
                                   skip_header=True,
                                   csv_reader_params={'delimiter': ","})

        train_data, val_data = train_data.split(split_ratio=0.8,
                                                random_state=random.seed(
                                                    args.seed))

        TEXT.build_vocab(train_data,
                         max_size=args.max_vocab_size,
                         vectors=args.vectors,
                         unk_init=torch.Tensor.normal_)
        LABEL.build_vocab(train_data)

        train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
            (train_data, val_data, test_data),
            batch_size=args.batch_size,
            sort_key=lambda x: len(x.text),
            device=device)

        input_dim = len(TEXT.vocab)
        embedding_dim = get_embedding_dim(args.vectors)
        pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
        unk_idx = TEXT.vocab.stoi[TEXT.unk_token]

        model = BiLSTM(input_dim, embedding_dim, args.hidden_dim,
                       args.output_dim, args.n_layers, args.bidirectional,
                       args.dropout, pad_idx)

        pretrained_embeddings = TEXT.vocab.vectors

        model.embedding.weight.data.copy_(pretrained_embeddings)
        model.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim)
        model.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim)

        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        criterion = nn.BCEWithLogitsLoss()

        model.to(device)
        criterion.to(device)

        best_valid_loss = float('inf')

        print("\nTraining...")
        print("===========")
        for epoch in range(1, args.n_epochs + 1):

            start_time = time.time()

            train_loss, train_acc = train(model, train_iterator, optimizer,
                                          criterion)
            valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(),
                           './checkpoints/{}-model.pt'.format(args.model))

            print(
                f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%'
            )

        model.load_state_dict(
            torch.load('./checkpoints/{}-model.pt'.format(args.model)))

        test_loss, test_acc = evaluate(model, test_iterator, criterion)

        print('\nEvaluating...')
        print("=============")
        print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%'
              )  # Test Loss: 0.139, Test Acc: 95.27%

    ##########  VANILLA RNN ##########

    else:
        print('\nVanilla RNN')
        TEXT = Field(tokenize='spacy')
        LABEL = LabelField(dtype=torch.float)
        data_fields = [("text", TEXT), ("label", LABEL)]

        train_data = TabularDataset(args.train_data_path,
                                    format='csv',
                                    fields=data_fields,
                                    skip_header=True,
                                    csv_reader_params={'delimiter': ","})

        test_data = TabularDataset(args.test_data_path,
                                   format='csv',
                                   fields=data_fields,
                                   skip_header=True,
                                   csv_reader_params={'delimiter': ","})

        train_data, val_data = train_data.split(split_ratio=0.8,
                                                random_state=random.seed(
                                                    args.seed))

        TEXT.build_vocab(train_data,
                         max_size=args.max_vocab_size,
                         vectors=args.vectors)
        LABEL.build_vocab(train_data)

        train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
            (train_data, val_data, test_data),
            batch_size=args.batch_size,
            sort_key=lambda x: len(x.text),
            device=device)

        input_dim = len(TEXT.vocab)
        embedding_dim = get_embedding_dim(args.vectors)

        model = RNN(input_dim, embedding_dim, args.hidden_dim, args.output_dim)

        pretrained_embeddings = TEXT.vocab.vectors

        model.embedding.weight.data.copy_(pretrained_embeddings)

        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        criterion = nn.BCEWithLogitsLoss()

        model.to(device)
        criterion.to(device)

        best_valid_loss = float('inf')

        print("\nTraining...")
        print("===========")
        for epoch in range(1, args.n_epochs + 1):

            start_time = time.time()

            train_loss, train_acc = train(model, train_iterator, optimizer,
                                          criterion)
            valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(),
                           './checkpoints/{}-model.pt'.format(args.model))

            print(
                f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%'
            )

        model.load_state_dict(
            torch.load('./checkpoints/{}-model.pt'.format(args.model)))

        test_loss, test_acc = evaluate(model, test_iterator, criterion)

        print('\nEvaluating...')
        print("=============")
        print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%'
              )  # Test Loss: 0.138, Test Acc: 95.05%
예제 #27
0
    def train(self):
        print(
            f'The model has {self.model.count_parameters():,} trainable parameters'
        )

        best_valid_loss = float('inf')

        # apply the appropriate initialization method for the model
        if self.params.model == 'seq2seq':
            self.model.apply(init_weights)
        elif self.params.model == 'seq2seq_gru':
            self.model.apply(init_weights_gru)
        elif self.params.model == 'seq2seq_attention':
            self.model.apply(init_weights_attention)

        print(self.model)

        for epoch in range(self.params.num_epoch):
            self.model.train()

            epoch_loss = 0
            start_time = time.time()

            for batch in self.train_iter:
                # For each batch, first zero the gradients
                self.optimizer.zero_grad()
                sources, sources_lengths = batch.kor
                targets = batch.eng

                predictions = self.model(sources, sources_lengths, targets)
                # targets     = [target length, batch size]
                # predictions = [target length, batch size, output dim]

                # flatten the ground-truth and predictions since CrossEntropyLoss takes 2D predictions with 1D targets
                # +) in this process, we don't use 0-th token, since it is <sos> token
                targets = targets[1:].view(-1)
                predictions = predictions[1:].view(-1, predictions.shape[-1])

                # targets = [(target sentence length - 1) * batch size]
                # predictions = [(target sentence length - 1) * batch size, output dim]

                loss = self.criterion(predictions, targets)

                # clip the gradients to prevent the model from exploding gradient
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               self.params.clip)

                loss.backward()
                self.optimizer.step()

                # 'item' method is used to extract a scalar from a tensor which only contains a single value.
                epoch_loss += loss.item()

            train_loss = epoch_loss / len(self.train_iter)
            valid_loss = self.evaluate()

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(self.model.state_dict(), self.params.save_model)

            print(
                f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}'
            )
            print(
                f'\tVal. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}'
            )