Exemplo n.º 1
0
def test_trellis_score():
    global nb_weights, hmm_trans_weights, tag_to_ix, all_tags, vocab, word_to_ix

    sentence = "they can can fish".split()

    initial_vec = np.full((1, len(all_tags)), -np.inf)
    initial_vec[0][
        tag_to_ix[START_TAG]] = 0  #setting all the score to START_TAG
    prev_scores = torch.autograd.Variable(
        torch.from_numpy(initial_vec.astype(np.float32)))

    emission_probs, tag_transition_probs = hmm.compute_weights_variables(nb_weights, hmm_trans_weights,\
                                                                         vocab, word_to_ix, tag_to_ix)

    path_score, best_path = viterbi.build_trellis(
        all_tags, tag_to_ix, [emission_probs[word_to_ix[w]] for w in sentence],
        tag_transition_probs)

    eq_(path_score.data.numpy(), -17)

    sentence = "they can can can can fish".split()
    path_score, best_path = viterbi.build_trellis(
        all_tags, tag_to_ix, [emission_probs[word_to_ix[w]] for w in sentence],
        tag_transition_probs)

    eq_(path_score.data.numpy(), -25.)
Exemplo n.º 2
0
 def predict(self, sentence):
     """
     This function predicts the tags by using the viterbi algorithm. You should be calling the viterbi algorithm from here.
     Inputs:
     - feats: the hidden state scores for each token in the input sentence. 
             Consider this to be the emission potential of each token for each tag.
     - gold_tags: the gold sequence of tags
     :returns:
     - the best_path which is a sequence of tags
     """
     lstm_feats = self.forward(sentence).view(len(sentence), -1)
     all_tags = [tag for tag, value in self.tag_to_ix.items()]
     score, path = viterbi.build_trellis(all_tags, self.tag_to_ix,
                                         lstm_feats, self.transitions)
     return path
Exemplo n.º 3
0
def test_hmm_on_example_sentence():
    global nb_weights, hmm_trans_weights, all_tags
    tag_to_ix={}
    for tag in list(all_tags):
        tag_to_ix[tag]=len(tag_to_ix)
    vocab, word_to_ix = most_common.get_word_to_ix(TRAIN_FILE)
    emission_probs, tag_transition_probs = hmm.compute_weights_variables(nb_weights, hmm_trans_weights, \
                                                                         vocab, word_to_ix, tag_to_ix)
    
    score, pred_tags = viterbi.build_trellis(all_tags,
                                             tag_to_ix,
                                             [emission_probs[word_to_ix[w]] for w in ['they', 'can', 'can', 'fish','.']],
                                             tag_transition_probs)
    
    assert_almost_equal(score.data.numpy()[0],-32.4456, places=2)
    eq_(pred_tags,['PRON', 'AUX', 'AUX', 'NOUN','PUNCT'])
Exemplo n.º 4
0
def test_hmm_on_example_sentence():
    global nb_weights, hmm_trans_weights, all_tags
    tag_to_ix = {}
    for tag in list(all_tags):
        tag_to_ix[tag] = len(tag_to_ix)
    vocab, word_to_ix = most_common.get_word_to_ix(TRAIN_FILE)
    emission_probs, tag_transition_probs = hmm.compute_weights_variables(nb_weights, hmm_trans_weights, \
                                                                         vocab, word_to_ix, tag_to_ix)

    score, pred_tags = viterbi.build_trellis(all_tags, tag_to_ix, [
        emission_probs[word_to_ix[w]]
        for w in ['they', 'can', 'can', 'fish', '.']
    ], tag_transition_probs)

    assert_almost_equal(score.data.numpy()[0], -32.4456, places=2)
    eq_(pred_tags, ['PRON', 'AUX', 'AUX', 'NOUN', 'PUNCT'])
Exemplo n.º 5
0
def test_build_trellis():
    global hand_weights

    sentence = "they can can fish".split()
    all_tags = ['NOUN','VERB']
    
    # modify the hand weights so you can't read off the answer to 3.1 :)
    hand_weights['NOUN','they',EMIT] = -2
    hand_weights['VERB','fish',EMIT] = -5
    hand_weights['VERB','VERB',TRANS] = -2

    trellis = viterbi.build_trellis(sentence,hmm.hmm_features,hand_weights,all_tags)

    eq_(len(trellis),4)
    eq_(len(trellis[-1]),2)
    eq_(trellis[-1]['VERB'],(-18,'VERB'))
    eq_(trellis[-2]['NOUN'],(-11,'VERB'))
    eq_(trellis[1]['VERB'],(-7,'NOUN'))
    eq_(trellis[1]['NOUN'],(-11,'NOUN'))
Exemplo n.º 6
0
def test_build_trellis():
    global hand_weights

    sentence = "they can can fish".split()
    all_tags = ['NOUN', 'VERB']

    # modify the hand weights so you can't read off the answer to 3.1 :)
    hand_weights['NOUN', 'they', EMIT] = -2
    hand_weights['VERB', 'fish', EMIT] = -5
    hand_weights['VERB', 'VERB', TRANS] = -2

    trellis = viterbi.build_trellis(sentence, hmm.hmm_features, hand_weights,
                                    all_tags)

    eq_(len(trellis), 4)
    eq_(len(trellis[-1]), 2)
    eq_(trellis[-1]['VERB'], (-18, 'VERB'))
    eq_(trellis[-2]['NOUN'], (-11, 'VERB'))
    eq_(trellis[1]['VERB'], (-7, 'NOUN'))
    eq_(trellis[1]['NOUN'], (-11, 'NOUN'))