def test_forward_pass_token_classification(self): # initialize model with randomly initialized token classification head model = LayoutLMForTokenClassification.from_pretrained( "microsoft/layoutlm-base-uncased", num_labels=13).to(torch_device) input_ids, attention_mask, bbox, token_type_ids, labels = prepare_layoutlm_batch_inputs( ) # forward pass outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids, labels=labels) # test the loss calculation to be around 2.65 # expected_loss = torch.tensor(2.65, device=torch_device) # The loss is currently somewhat random and can vary between 0.1-0.3 atol. # self.assertTrue(torch.allclose(outputs.loss, expected_loss, atol=0.1)) # test the shape of the logits logits = outputs.logits expected_shape = torch.Size((2, 25, 13)) self.assertEqual(logits.shape, expected_shape)
def _load_model(self): self.model = LayoutLMForTokenClassification.from_pretrained( 'microsoft/layoutlm-base-uncased', config=self.config, ) self.model.resize_token_embeddings(len(self.tokenizer)) self.model.to(self.device)
def model_init(): """Returns an initialized model for use in a Hugging Face Trainer.""" model = LayoutLMForTokenClassification.from_pretrained( "microsoft/layoutlm-base-uncased", num_labels=5, from_tf = True ) model = model.to('cuda') return model
def __init__(self, image_path, model_path, config_path, num_labels=13, args=None): super(LayoutLM, self).__init__() self.image = openImage(image_path) self.args = args self.tokenizer = LayoutLMTokenizer.from_pretrained( "microsoft/layoutlm-base-uncased") config = LayoutLMConfig.from_pretrained(config_path) self.model = LayoutLMForTokenClassification.from_pretrained( model_path, config=config) self.model.to(device) self.input_ids = None self.attention_mask = None self.token_type_ids = None self.bboxes = None self.token_actual_boxes = None
def train(dataset_path, loader_type='combined_loader', batch_size=4, num_workers=16, dataset_rand_seq=True, dataset_rand_seq_prob=0.5, dataset_exlude_image_mask=True, state_dict_path=None, weight_path='weights/extract/', max_epoch=5, lr=0.001, valcheck_interval=2000, num_gpus=1, log_freq=100, resume_checkpoint_path=None, checkpoint_saved_path="checkpoints/v3/", logs_path="logs/v3/", prefix_name='layoutlm-v2', manual_seed=1261): logging.basicConfig(level=logging.INFO) #load tokensizer logging.info("Load BertTokenizer with indobenchmark/indobert-base-p2") tokenizer = BertTokenizer.from_pretrained( "indobenchmark/indobert-base-p2", do_lower_case=True, cache_dir=None, ) path = dataset_path if loader_type == 'combined_loader': logging.info(f"Load Combined Loader with path {dataset_path}") train_loader, valid_loader = loader.get_loader( path, tokenizer=tokenizer, batch_size=batch_size, num_workers=num_workers, rand_seq=dataset_rand_seq, rand_seq_prob=dataset_rand_seq_prob, excluce_image_mask=dataset_exlude_image_mask) else: logging.info(f"Load Base Loader with path {dataset_path}") train_loader, valid_loader = loader.get_base_loader( path, tokenizer=tokenizer, batch_size=batch_size, num_workers=num_workers, rand_seq=dataset_rand_seq, rand_seq_prob=dataset_rand_seq_prob, excluce_image_mask=dataset_exlude_image_mask) logging.info(f"Load LayoutLMConfig for LayoutLMForTokenClassification") config = LayoutLMConfig.from_pretrained("microsoft/layoutlm-base-uncased", num_labels=label_cfg.num_labels, cache_dir=None) logging.info( f"Load LayoutLMForTokenClassification from_pretrained microsoft/layoutlm-base-uncased" ) model = LayoutLMForTokenClassification.from_pretrained( 'microsoft/layoutlm-base-uncased', config=config, # return_dict=True ) model.resize_token_embeddings(len(tokenizer)) if state_dict_path: logging.info(f"Load state_dict from path {state_dict_path}") state_dict = torch.load(state_dict_path, map_location=torch.device("cpu")) model.load_state_dict(state_dict) # model = model.to(device) #prepare the task task = TaskLayoutLM(model, tokenizer) # DEFAULTS used by the Trainer checkpoint_callback = pl.callbacks.ModelCheckpoint( dirpath=checkpoint_saved_path, save_top_k=1, verbose=True, monitor='val_loss', mode='min', prefix=prefix_name) tb_logger = pl_loggers.TensorBoardLogger(logs_path) pl.trainer.seed_everything(manual_seed) trainer = pl.Trainer( weights_summary="top", max_epochs=max_epoch, val_check_interval=valcheck_interval, gpus=num_gpus, log_every_n_steps=log_freq, deterministic=True, benchmark=True, logger=tb_logger, checkpoint_callback=checkpoint_callback, resume_from_checkpoint=resume_checkpoint_path, ) trainer.fit(task, train_loader, valid_loader) # metrics = trainer.test(task, valid_loader) metrics = trainer.logged_metrics #prepare to save result # print(task._results.keys()) vacc, vloss = metrics['val_acc'], metrics['val_loss'] # tacc, tloss = metrics['trn_acc'], metrics['trn_loss'] last_epoch = metrics['epoch'] dirname = Path(dataset_path).name filename = f'layoutlm_v2_ktp_{dirname}_vacc{vacc:.4}_vloss{vloss:.4}_epoch{last_epoch}_cli.pth' saved_filename = str(Path(weight_path).joinpath(filename)) logging.info(f"Prepare to save training results to path {saved_filename}") torch.save(model.state_dict(), saved_filename)