if __name__ == "__main__":
    config = Config()
    processor = DataProcessor(config.vocab_size)
    train_data, train_label = processor.get_datasets(config.train_pos_path,
                                                     config.train_neg_path,
                                                     config.vocab_path,
                                                     config.seq_len)
    config.vocab_size = processor.vocab_size  # 词表实际大小

    test_data, test_label = processor.get_datasets(config.test_pos_path,
                                                   config.test_neg_path,
                                                   config.vocab_path,
                                                   config.seq_len)

    train_set_iters = processor.batch_iter(list(zip(train_data, train_label)),
                                           config.batch_size,
                                           config.num_epochs,
                                           shuffle=True)

    test_batchs = processor.test_batchs(list(zip(test_data, test_label)),
                                        config.batch_size)

    model = TransformerEncoder(config)
    model.to(config.device)

    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
    loss_func = nn.BCELoss()
    num_batches_per_epoch = int((len(train_data) - 1) / config.batch_size) + 1
    model = train(model, train_set_iters, test_batchs, optimizer, loss_func,
                  config.device, num_batches_per_epoch)