示例#1
0
                elif word.istitle():
                    word = "_CF_"
                elif all(c in string.punctuation or c.isdigit() for c in word):
                    word = "_NP_"
                else:
                    word = "_RARE_"

            # Iterate over u and v
            for u in K[k - 1]:
                for v in K[k]:

                    # Find max over w in K[k-2]
                    w_candidates = defaultdict(float)

                    for w in K[k - 2]:
                        w_candidates[w] = pi[k - 1][(w, u)] * counter.calc_mle(
                            [w, u, v]) * counter.calc_emissions(word, v)

                    final_w = max(w_candidates.iteritems(),
                                  key=operator.itemgetter(1))

                    # Assign pi value
                    pi[k][(u, v)] = final_w[1]

            # Get the (tag, probability) of v in max(pi[k](u,v))
            final_k_idx = max(pi[k].iteritems(), key=operator.itemgetter(1))

            prob = final_k_idx[1]
            # Log probability
            log_prob = math.log(prob)
            # Ouput format: word, tag, log probability
            sys.stdout.write("%s %s %s\n" %
        sys.exit(2)

    try:
        counts_file = file(sys.argv[1], "r")
        trigram_file = sys.argv[2]
    except IOError:
        sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg)
        sys.exit(1)

    # Initialize a trigram counter
    counter = Hmm(3)
    # Read in counts
    counter.read_counts(count_file)

    # Iterate through trigrams in trigram_file and calculate the log probability of each trigram.
    for line in test_file:
        trigram = line.strip().split(" ")

        if trigram:  # Nonempty line
            prob = counter.calc_mle(trigram)

            # Get the log of the probability
            log_prob = math.log(prob)

            # Write log probability to output file
            sys.stdout.write(
                "%s %s %s %s\n" %
                (trigram[0], trigram[1], trigram[2], str(log_prob)))
        else:
            print ""