def update_liberal_dict():
	global lib_dict
	global total_liberal_words
	global unique_liberal_words
	lib_dict = parser.build_dict(global_vars.liberal_file)
	total_liberal_words = sum(lib_dict.values())
	unique_liberal_words = len(lib_dict.keys())
	return lib_dict
def update_conservative_dict():
	global cons_dict
	global total_conservative_words
	global unique_conservative_words
	cons_dict = parser.build_dict(global_vars.conservative_file)
	total_conservative_words = total_conservative_words = sum(cons_dict.values())
	unique_conservative_words = len(cons_dict.keys())
	return cons_dict
def k_fold_cross_valid_known(k, parsed, known, discounts):
    res = defaultdict(list)
    for train, test in _fold(parsed, k):
        for discount in discounts:
            print 'train: ', len(train), 'test: ', len(test)
            tag2id, word2id = build_dict(parsed)
            id2tag = {v: k for k, v in tag2id.iteritems()}
            id2word = {v: k for k, v in word2id.iteritems()}
            emission, transition = _counter_known(parsed, train, known,
                                                  0.85, tag2id, word2id, discount)

            count_ok, count_total = 0., 0.
            for i, seq in enumerate(test):
                out = viterbi(seq, transition, emission, word2id, tag2id)
                ok, total = _compare(seq[1:-1], id_to_token(out, id2word, id2tag))
                count_ok += ok; count_total += total
                if DEBUG:
                    print 'evaluating', i, 'th sentence.', count_ok/count_total, 'so far.'
            res[discount].append(count_ok/count_total)
            print 'Fold accuracy: ', res[discount][-1], 'discount: ', discount
    for d in res:
        print 'discount:', d, '->', 'avg:', np.mean(res[d])
    for j in xrange(1, len(seq)):
        for i in xrange(len(transition)):
            k_score = scores[:, j - 1] + np.log(transition[:, i]) + np.log(emission[i, seq[j]])
            backpointer[i, j] = np.argmax(k_score)
            scores[i, j] = k_score[backpointer[i, j]]

    j = int(np.argmax(scores, axis=0)[-1])
    sol = [j]
    for i in xrange(len(seq) - 1, 0, -1):
        j = backpointer[j, i]
        sol.append(j)
    sol.reverse()
    return zip(seq[:-1], sol[:-1])


if __name__ == "__main__":
    path = "../WSJ-2-12/*/*.POS"
    docs = glob(path)

    parsed = parse(docs)

    np.random.shuffle(parsed)
    parsed = trigramize(parsed)
    tag2id, word2id = build_dict(parsed[:-10])
    id2word = {v: k for k, v in word2id.iteritems()}
    id2tag = {v: k for k, v in tag2id.iteritems()}
    emission, transition = counter(parsed[:-10], tag2id, word2id)
    print "test POS", parsed[-1][1:-1]
    output = viterbi(parsed[-1], transition, emission, word2id, tag2id)
    print "TAGGED", id_to_token(output, id2word, id2tag)