示例#1
0
    def inference(self, X, w):
        """Run Viterbi inference.

        This methods is a wrapper that converts the CRF weights into
        different arrays of scores that represent transition and emission.
        Then this method can call the general purpose Viterbi code in
        viterbi.py to compute the best label sequence.

        This function just returns the best sequence, y.
        """
        from viterbi import run_viterbi
        L = self.num_classes
        N = len(X)
        start_scores = np.zeros(L)
        end_scores = np.zeros(L)
        trans_scores = np.zeros((L, L))
        emission_scores = np.zeros((N, L))
        # fill the above arrays for the weight vector
        for j in xrange(L):
            start_scores[j] = w[0, self.get_start_trans_idx(j)]
            end_scores[j] = w[0, self.get_end_trans_idx(j)]
            # transition
            for k in xrange(L):
                trans_scores[j][k] = w[0, self.get_trans_idx(j, k)]
            # emission
            for i in xrange(N):
                score = 0.0
                for fidx in X[i]:
                    score += w[0, self.get_ftr_idx(fidx, j)]
                emission_scores[i][j] = score
        # now run the viterbi code!
        (score, yhat) = run_viterbi(emission_scores, trans_scores,
                                    start_scores, end_scores)
        return yhat
    def task(self):
        data_filename = "robot_no_momentum.data"
        hmm, d = train_hmm_from_data(data_filename)
        err_full = run_viterbi(hmm, d)

        data_filename_m = "robot_with_momentum.data"
        hmm_m, d_m = train_hmm_from_data(data_filename_m)
        err_full_m = run_viterbi(hmm_m, d_m)

        listNames = ["Without momentum", "With momentum"]
        listData = [1 - err_full, 1 - err_full_m]
        chart = {"chart": {"defaultSeriesType": "column"},
                 "xAxis": {"categories": listNames},
                 "yAxis": {"title": {"text": "Fraction Correct"}},
                 "title": {"text": "HMM performance on"
                           " inferring robot location."},
                 "series": [{"name": "Test set performance",
                             "data": listData}]}
        return chart
示例#3
0
def main():
	

	''' Load test data '''
	# Input: Testing, generate new windows, oversampling, viterbi training
	DATA_TYPE = "testing"
	GENERATE_NEW_WINDOWS = True
	OVERSAMPLING = False
	VITERBI = False
	data_set = get_data_set(DATA_TYPE, GENERATE_NEW_WINDOWS, OVERSAMPLING, VITERBI)

	''' Create network '''
	cnn = Convolutional_Neural_Network()
	cnn.set_data_set(data_set)
	cnn.load_model()
 	
 	''''''
	actual = data_set._labels
	cnn_result = cnn.get_predictions()
	np.savetxt(V.VITERBI_PREDICTION_PATH_TESTING, cnn_result, delimiter=",")
	cnn_result = pd.read_csv(V.VITERBI_PREDICTION_PATH_TESTING, header=None, sep='\,',engine='python').as_matrix()

	viterbi_result = run_viterbi()
	np.savetxt(V.VITERBI_RESULT_TESTING, viterbi_result, delimiter=",")
	viterbi_result = pd.read_csv(V.VITERBI_RESULT_TESTING, header=None, sep='\,',engine='python').as_matrix()
	
	''' Add results in array with actual label'''
	result = np.zeros((len(cnn_result), 3))
	for i in range(0,len(cnn_result)):
		a = np.argmax(actual[i])
		c = np.argmax(cnn_result[i])
		v = viterbi_result[i]-1
		result[i] = [a,c,v]
	

	# Remove activities labelled as -100 - activites such as shuffling, transition ... See data.py
	boolean_actual = np.invert(actual[:,0] == -100).T
	result = result[boolean_actual]

	np.savetxt(V.PREDICTION_RESULT_TESTING, result, delimiter=",")
	result = pd.read_csv(V.PREDICTION_RESULT_TESTING, header=None, sep='\,',engine='python').as_matrix()

	produce_statistics_json(result)

	visualize(result)
示例#4
0
    def inference(self, X, w):
        """Run Viterbi inference.

        This methods is a wrapper that converts the CRF weights into
        different arrays of scores that represent transition and emission.
        Then this method can call the general purpose Viterbi code in
        viterbi.py to compute the best label sequence.

        This function just returns the best sequence, y.
        """
        from viterbi import run_viterbi
        L = self.num_classes
        start_scores, end_scores, trans_scores, emission_scores = self.load_weights(
            w, L, X)

        # now run the viterbi code!
        score, yhat = run_viterbi(emission_scores, trans_scores, start_scores,
                                  end_scores)

        return yhat
示例#5
0
def main():
	''' Load test data '''
	# Input: Testing, generate new windows, oversampling, viterbi training
	DATA_TYPE = "predicting"
	GENERATE_NEW_WINDOWS = True
	OVERSAMPLING = False
	VITERBI = False
	data_set = get_data_set(DATA_TYPE, GENERATE_NEW_WINDOWS, OVERSAMPLING, VITERBI)

	''' Create network '''
	cnn = Convolutional_Neural_Network()
	cnn.set_data_set(data_set)
	cnn.load_model()
 	
 	''''''
	cnn_result = cnn.get_predictions()


	viterbi_result = run_viterbi()

	print 'Prediction saved at path', V.VITERBI_RESULT_PREDICTING
def run_viterbi_test():
    """A simple tester for Viterbi algorithm.

    This function generates a bunch of random emission and transition scores,
    and computes the best sequence by performing a brute force search over all
    possible sequences and scoring them. It then runs Viterbi code to see what
    is the score and sequence returned by it.

    Compares both the best sequence and its score to make sure Viterbi is correct.
    """
    from viterbi import run_viterbi
    from numpy import random
    import numpy as np
    from itertools import product

    maxN = 7  # maximum length of a sentence (min is 1)
    maxL = 4  # maximum number of labels (min is 2)
    num_tests = 1000  # number of sentences to generate
    random.seed(0)
    tolerance = 1e-5  # how close do the scores have to be?

    emission_var = 1.0  # variance of the gaussian generating emission scores
    trans_var = 1.0  # variance of the gaussian generating transition scores

    passed_y = 0  # how many times the correct sequence was predicted
    passed_s = 0  # how many times the correct score was returned

    for t in xrange(num_tests):
        N = random.randint(1, maxN + 1)
        L = random.randint(2, maxL + 1)

        # Generate the scores
        emission_scores = random.normal(0.0, emission_var, (N, L))
        trans_scores = random.normal(0.0, trans_var, (L, L))
        start_scores = random.normal(0.0, trans_var, L)
        end_scores = random.normal(0.0, trans_var, L)

        # run viterbi
        (viterbi_s, viterbi_y) = run_viterbi(emission_scores, trans_scores,
                                             start_scores, end_scores)
        #        print ("Viterbi", viterbi_s, viterbi_y)

        # compute the best sequence and score
        best_y = []
        best_s = -np.inf
        for y in product(range(L), repeat=N):  # all possible ys
            # compute its score
            score = 0.0
            score += start_scores[y[0]]
            #            print(y,'y')
            #            print(y)
            #            print(score,'1')
            for i in xrange(N - 1):
                score += trans_scores[y[i], y[i + 1]]
                #                print(score,'2',trans_scores[y[i], y[i+1]])
                score += emission_scores[i, y[i]]
#                print(score,'3',emission_scores[i,y[i]])
            score += emission_scores[N - 1, y[N - 1]]
            #            print(score,'4',emission_scores[N-1,y[N-1]])
            score += end_scores[y[N - 1]]
            #            print(score,'5',end_scores[y[N-1]])
            #            if 8.38435628640<score<8.38435628650:
            #                break
            #            if y[0]==1 and y[1]==2 and y[2]==2 and y[3]==0 and y[4]==0:
            #                break
            # update the best
            if score > best_s:
                best_s = score
                best_y = list(y)


#        break
#        print ("Brute", best_s, best_y)

# mismatch if any label prediction doesn't match
        match_y = True
        for i in xrange(len(best_y)):
            if viterbi_y[i] != best_y[i]:
                match_y = False
        if match_y: passed_y += 1
        # the scores should also be very close
        if abs(viterbi_s - best_s) < tolerance:
            passed_s += 1

    print "Passed(y)", passed_y * 100.0 / num_tests
    print "Passed(s)", passed_s * 100.0 / num_tests
    assert passed_y == num_tests
    assert passed_s == num_tests
示例#7
0
    def viterbi_tags(self, logits: torch.Tensor,
                     mask: torch.Tensor) -> List[Tuple[List[int], float]]:
        """
        Uses viterbi algorithm to find most likely tags for the given inputs.
        If constraints are applied, disallows all other transitions.
        """
        _, max_seq_length, num_tags = logits.size()

        # Get the tensors out of the variables
        logits, mask = logits.data, mask.data

        # Augment transitions matrix with start and end transitions
        start_tag = num_tags
        end_tag = num_tags + 1
        transitions = torch.Tensor(num_tags + 2, num_tags + 2).fill_(-10000.)

        # Apply transition constraints
        constrained_transitions = (
            self.transitions * self._constraint_mask[:num_tags, :num_tags] +
            -10000.0 * (1 - self._constraint_mask[:num_tags, :num_tags]))
        transitions[:num_tags, :num_tags] = constrained_transitions.data

        if self.include_start_end_transitions:
            transitions[start_tag, :num_tags] = (
                self.start_transitions.detach() *
                self._constraint_mask[start_tag, :num_tags].data + -10000.0 *
                (1 - self._constraint_mask[start_tag, :num_tags].detach()))
            transitions[:num_tags, end_tag] = (
                self.end_transitions.detach() *
                self._constraint_mask[:num_tags, end_tag].data + -10000.0 *
                (1 - self._constraint_mask[:num_tags, end_tag].detach()))
        else:
            transitions[start_tag, :num_tags] = (
                -10000.0 *
                (1 - self._constraint_mask[start_tag, :num_tags].detach()))
            transitions[:num_tags, end_tag] = -10000.0 * (
                1 - self._constraint_mask[:num_tags, end_tag].detach())

        transitions = transitions.cpu().numpy()
        best_paths = []
        # Pad the max sequence length by 2 to account for start_tag + end_tag.
        tag_sequence = torch.Tensor(max_seq_length + 2, num_tags + 2)

        for prediction, prediction_mask in zip(logits, mask):
            sequence_length = torch.sum(prediction_mask)

            # Start with everything totally unlikely
            tag_sequence.fill_(-10000.)
            # At timestep 0 we must have the START_TAG
            tag_sequence[0, start_tag] = 0.
            # At steps 1, ..., sequence_length we just use the incoming prediction
            tag_sequence[1:(sequence_length +
                            1), :num_tags] = prediction[:sequence_length]
            # And at the last timestep we must have the END_TAG
            tag_sequence[sequence_length + 1, end_tag] = 0.

            # We pass the tags and the transitions to ``run_viterbi``.
            target_tag_sequence = tag_sequence[:(sequence_length +
                                                 2)].cpu().numpy()
            viterbi_score, viterbi_path =\
                viterbi.run_viterbi(target_tag_sequence[1:-1, :num_tags], transitions[:num_tags, :num_tags], transitions[start_tag, :num_tags], transitions[:num_tags, end_tag])
            best_paths.append((viterbi_path, viterbi_score))
        return best_paths
示例#8
0
def run_viterbi_test():
    """A simple tester for Viterbi algorithm.

    This function generates a bunch of random emission and transition scores,
    and computes the best sequence by performing a brute force search over all
    possible sequences and scoring them. It then runs Viterbi code to see what
    is the score and sequence returned by it.

    Compares both the best sequence and its score to make sure Viterbi is correct.
    """
    from viterbi import run_viterbi
    from numpy import random
    from itertools import product

    maxN = 7  # maximum length of a sentence (min is 1)
    maxL = 4  # maximum number of labels (min is 2)
    num_tests = 1  # number of sentences to generate
    random.seed(0)
    tolerance = 1e-5  # how close do the scores have to be?

    emission_var = 1.0  # variance of the gaussian generating emission scores
    trans_var = 1.0  # variance of the gaussian generating transition scores

    passed_y = 0  # how many times the correct sequence was predicted
    passed_s = 0  # how many times the correct score was returned

    for t in xrange(num_tests):
        N = 2
        L = 3

        # Generate the scores
        # emission_scores = random.normal(0.0, emission_var, (N,L))
        # trans_scores = random.normal(0.0, trans_var, (L,L))
        # start_scores = random.normal(0.0, trans_var, L)
        # end_scores = random.normal(0.0, trans_var, L)

        #print start_scores

        emission_scores = np.array([[0.1, 0.3, 0.25], [0.2, 0.45, 0.31]])
        trans_scores = np.array([[0.3, 0.2, 0.5], [0.12, 0.4, 0.3],
                                 [0.1, 0.6, 0.5]])
        start_scores = np.array([0.2, 0.5, 0.7])
        end_scores = np.array([0.5, 0.4, 0.1])

        # run viterbi
        (viterbi_s, viterbi_y) = run_viterbi(emission_scores, trans_scores,
                                             start_scores, end_scores)
        print "Viterbi", viterbi_s, viterbi_y

        # compute the best sequence and score
        best_y = []
        best_s = -np.inf
        for y in product(range(L), repeat=N):  # all possible ys
            # compute its score
            score = 0.0
            score += start_scores[y[0]]
            for i in xrange(N - 1):
                score += trans_scores[y[i], y[i + 1]]
                score += emission_scores[i, y[i]]
            score += emission_scores[N - 1, y[N - 1]]
            score += end_scores[y[N - 1]]
            # update the best
            if score > best_s:
                best_s = score
                best_y = list(y)
        print "Brute", best_s, best_y

        # mismatch if any label prediction doesn't match
        match_y = True
        for i in xrange(len(best_y)):
            if viterbi_y[i] != best_y[i]:
                match_y = False
        if match_y: passed_y += 1
        # the scores should also be very close
        print "scores: "
        print viterbi_s, best_s
        if abs(viterbi_s - best_s) < tolerance:
            passed_s += 1

    print "Passed(y)", passed_y * 100.0 / num_tests
    print "Passed(s)", passed_s * 100.0 / num_tests
    assert passed_y == num_tests
    assert passed_s == num_tests
 def test_small_robot_dataset(self):
     data_filename = "robot_small.data"
     data_filename = normalize_filename(data_filename)
     hmm, d = train_hmm_from_data(data_filename)
     err_full = run_viterbi(hmm, d, True)
     self.assertAlmostEqual(err_full, 2.0 / 9)