print('Done...') tag2idx = {'O': 1, 'PER': 2, 'LOC': 3, 'ORG': 4} idx2tag = {1: 'O', 2: 'PER', 3: 'LOC', 4: 'ORG'} with open('weights/char2idx_augmented.pkl', 'rb') as f: char2idx = pickle.load(f) with open('weights/idx2char_augmented.pkl', 'rb') as f: idx2char = pickle.load(f) # convert labels to indices indexed_target_test = prepare_data.label_to_idx(target_test, char2idx) indexed_target_word_test = prepare_data.word_to_idx(target_test, embeddings) test_data = prepare_data.combine_data(features_test, indexed_target_test) # initialize the Encoder encoder = Encoder(features_test[0].size(1), encoder_hidden_size, encoder_layers, batch_size, device).to(device) # initialize the Decoder decoder = Decoder(embedding_dim_chars, encoder_hidden_size, attention_hidden_size, num_filters, len(char2idx)+1, decoder_layers, encoder_layers, batch_size, attention_type, device).to(device) # load the model checkpoint = torch.load('weights/parliament/state_dict_21.pt', map_location=torch.device('cpu')) encoder.load_state_dict(checkpoint['encoder']) decoder.load_state_dict(checkpoint['decoder'])
# pickle.dump(idx2char, f, protocol=pickle.HIGHEST_PROTOCOL) # used for normalized tag2idx = {'O': 1, 'PER': 2, 'LOC': 3, 'ORG': 4} idx2tag = {1: 'O', 2: 'PER', 3: 'LOC', 4: 'ORG'} with open('weights/char2idx.pkl', 'rb') as f: char2idx = pickle.load(f) with open('weights/idx2char.pkl', 'rb') as f: idx2char = pickle.load(f) # convert labels to indices indexed_target_train = prepare_data.label_to_idx(target_train, char2idx) indexed_target_dev = prepare_data.label_to_idx(target_dev, char2idx) indexed_target_word_train = prepare_data.word_to_idx(target_train, embeddings) indexed_target_word_dev = prepare_data.word_to_idx(target_dev, embeddings) indexed_tags_train = prepare_data.tag_to_idx(tags_train, tag2idx) indexed_tags_dev = prepare_data.tag_to_idx(tags_dev, tag2idx) # combine features and labels in a tuple train_data = prepare_data.combine_data(features_train, indexed_target_train, indexed_target_word_train, indexed_tags_train) dev_data = prepare_data.combine_data(features_dev, indexed_target_dev, indexed_target_word_dev, indexed_tags_dev) # remove extra data that doesn't fit in batch train_data = prepare_data.remove_extra(train_data, batch_size) dev_data = prepare_data.remove_extra(dev_data, batch_size)