pretrained_embeddings = load_pretrained_embeddings(embeddings_path, train_dataset.word2idx, 300, is_crf=crf_model) name_ = 'LSTM' hp = HyperParameters(name_, train_dataset.word2idx, train_dataset.labels2idx, pretrained_embeddings, batch_size) # , collate_fn=DatasetParser.pad_collate train_dataset_ = DataLoader(dataset=train_dataset, batch_size=batch_size) dev_dataset_ = DataLoader(dataset=dev_dataset, batch_size=batch_size) test_dataset_ = DataLoader(dataset=test_dataset, batch_size=batch_size) model = BaselineModel(hp).to(train_dataset.get_device) trainer = Trainer( model=model, loss_function=CrossEntropyLoss(ignore_index=train_dataset.labels2idx['<PAD>']), optimizer=Adam(model.parameters()), batch_num=hp.batch_size, num_classes=hp.num_classes, verbose=True ) save_to_ = join(RESOURCES_PATH, f"{model.name}_model.pt") trainer.train(train_dataset_, dev_dataset_, epochs=1, save_to=save_to_) evaluator = Evaluator(model, test_dataset_, crf_model) evaluator.check_performance(train_dataset.idx2label)
train_dataset_ = DataLoader(dataset=train_dataset, batch_size=batch_size) dev_dataset_ = DataLoader(dataset=dev_dataset, batch_size=batch_size) test_dataset_ = DataLoader(dataset=test_dataset, batch_size=batch_size) embeddings_path = os.path.join(RESOURCES_PATH, 'wiki.en.vec') pretrained_embeddings = load_pretrained_embeddings( embeddings_path, train_dataset.word2idx, 300, is_crf=CRF_MODEL) if PRETRAINED else None idx2label = load_pickle( os.path.join(RESOURCES_PATH, 'Stacked_BiLSTM_CRF_Fasttext_2315_idx2label.pkl')) word2idx = load_pickle( os.path.join(RESOURCES_PATH, 'Stacked_BiLSTM_CRF_Fasttext_2315_word2idx.pkl')) hp = HyperParameters(name_, word2idx, train_dataset.idx2label, pretrained_embeddings, batch_size) model = CRF_Model(hp).to( train_dataset.get_device) if CRF_MODEL else BaselineModel(hp).to( train_dataset.get_device) model.load_model(model_path) evaluator = Evaluator(model, test_dataset_, CRF_MODEL) evaluator.check_performance(idx2label) tokens = test_dataset.data_x preds_lst = model.predict_sentences(tokens, word2idx, idx2label) with open('preds.txt', encoding='utf-8', mode='w+') as f: for lst in preds_lst: f.write(f"{str(lst)}\n")