args.vocab_size, args.embedding_size = embLoader.get_dimensions() model = NN(vocab_size=args.vocab_size, number_tags=dataset.get_num_tags(), batch_size=args.batch_size, gpu=args.gpu, window_size=args.window_size) model.load_state_dict(torch.load(args.trained_model)) if args.gpu and torch.cuda.is_available(): model.cuda() tag_level = {'correct': 0, 'total_in_test': 0, 'total_detected': 0} for i, (words_input, tags) in enumerate(test_loader): input_tensor, target_tensor = model.get_train_tensors( words_input, tags, embLoader.word_to_index, dataset.tag_to_index) output = model(Variable(input_tensor)) top_n, top_i = output.data.topk(1) for i, tag_i in enumerate(top_i): tag_i = tag_i[0] if tag_i == target_tensor[ i] and target_tensor[i] != dataset.tag_to_index('O'): tag_level['correct'] += 1 tag_level['total_detected'] += 1 elif tag_i != dataset.tag_to_index('O'): tag_level['total_detected'] += 1 if target_tensor[i] != dataset.tag_to_index('O'): tag_level['total_in_test'] += 1 tag_level['precision'] = tag_level['correct'] / tag_level['total_detected'] tag_level['recall'] = tag_level['correct'] / tag_level['total_in_test']
model = NN(vocab_size=args.vocab_size,number_tags=dataset.get_num_tags(), batch_size=args.batch_size,gpu=args.gpu,window_size=args.window_size, include_cap=args.capitalization,number_suffix=dataset.get_num_suffix(), embedding_dim=args.embedding_size,activation=args.activation) model.load_state_dict(torch.load(args.trained_model)) if args.gpu and torch.cuda.is_available(): model.cuda() tagged_file = open(args.output_file,"w") if args.embeddings_path is not None: word_to_index = embLoader.word_to_index else: word_to_index = dataset.word_to_index for i,(words_input,tags) in enumerate(test_loader): input_tensor,target_tensor,cap_tensor,suf_tensor = model.get_train_tensors(words_input,tags, word_to_index,dataset.tag_to_index,suffix2id=dataset.suffix_to_index) output = model(Variable(input_tensor),Variable(cap_tensor),Variable(suf_tensor)) top_n, top_i = output.data.topk(1) for i,tag_i in enumerate(top_i): word = words_input[args.window_size][i] real_tag = dataset.index_to_tag(target_tensor[i]) predicted_tag = dataset.index_to_tag(tag_i[0]) tagged_file.write("{} {} {}\n".format(word,real_tag,predicted_tag)) tagged_file.close()
gpu=args.gpu, window_size=args.window_size, number_suffix=dataset.get_num_suffix(), include_cap=args.capitalization) model.load_state_dict(torch.load(args.trained_model)) if args.embeddings_path is not None: embLoader = EmbeddingsLoader(args.embeddings_path) print("Embeddings loaded, vocab size: {} embedding dims: {}".format( embLoader.get_dimensions()[0], embLoader.get_dimensions()[1])) if args.gpu and torch.cuda.is_available(): model.cuda() while True: sentence = input("Enter sentence to process: ") word_list = [[w] for w in sentence.split()] for i in range(len(word_list) - (2 * args.window_size)): input_tensor, _, cap_tensor, suf_tensor = model.get_train_tensors( word_list[i:i + 1 + 2 * args.window_size], word2id=embLoader.word_to_index, suffix2id=dataset.suffix_to_index) output = model(Variable(input_tensor), Variable(cap_tensor), Variable(suf_tensor)) top_n, top_i = output.data.topk(1) tag_i = top_i[0][0] print("{} : {}".format( word_list[i:i + 1 + 2 * args.window_size][args.window_size], dataset.index_to_tag(tag_i)))