def execute(treebank, dev):
    print "reading treebank..."
    parses = utils.read_parses_no_indent(treebank)
    parse_lists = []
    for parse in parses:
        parse_lists.append(utils.make_parse_list(parse))
      
    print "learning pcfg..."  
    nonterms, terms, start, prob = grammar.learn(parse_lists)
    
    print "learning hmm..."
    emission, transition = sequnece_labeler.learn(parse_lists)

    print "reading dev data..."
    dev_sentences = utils.get_sentences(dev)
    print dev_sentences[100] 
    for sentence in dev_sentences:
        parse = cky.run(sentence, nonterms, start, prob)
        sequnece = viterbi.run(sentence, emission, transition)
def quick_execute(dev):
    print "loading learnt parameters..."
    pcfg_prob, nonterms, start = cky.get_pcfg()
    hmm, tagset = viterbi.get_hmm_tagset()

    print "reading dev data..."
    parses = utils.read_parses_no_indent(dev)

    i = 0
    for parse in parses:
        if len(parse) > 100:
            parse_list = utils.make_parse_list(parse)
            sentence, truetags = utils.get_terminals_tags(parse_list)
            print '\n', sentence, '\n'
            #print dev_sentences.index(sentence)
            print "running dual decomposition..."
            num_iterations = dd_parser_tagger.run(sentence, pcfg_prob, nonterms, start, tagset, hmm)
            print "\n", truetags, " :true tags"
            if num_iterations != -1:
                print "converges in ", num_iterations ," iterations \n"
            else:
                print "does not converge :(\n"
        prev_tag, current_tag = trans.split("~>")
        counts.write(str(count)+ " 2-GRAM "+ prev_tag+ " "+ current_tag+ "\n")
    counts.close() 

def learn(parses):
    emission_counts = defaultdict()
    transition_counts = defaultdict()
    tag_counts = defaultdict()

    for parse in parses:
        parse_list = utils.make_parse_list(parse)
        update_counts(parse_list, emission_counts, transition_counts, tag_counts)
#   I'm not doing smoothing because smoothing gives very bad results
#   Every -RARE- word gets assigned to the FW tag, and then all following tags are FW. 
#   Because FW->-RARE- and FW->FW have high probabilities
#   emission_counts = smooth_emission(emission_counts, tag_counts) # I don't like this! Why won't u work otherwise, Python?

    set_hmm_params(emission_counts, transition_counts, tag_counts)
    check_if_prob_dist(emission_counts)
    check_if_prob_dist(transition_counts)
    write_hmm_params(emission_counts, transition_counts, tag_counts)
    #write_for_java(emission_counts, transition_counts, tag_counts)
    return emission_counts, transition_counts

if __name__ == "__main__":
    treebank = sys.argv[1]
    parses = utils.read_parses_no_indent(treebank)
    emission, transition = learn(parses)