Пример #1
0
            # First write the header line
            text = [line.strip() for line in open(os.path.join(dirpath, name)).readlines() if len(line.strip()) > 1]
            row = [int(grade_utils.grade_text("\n".join(text), test)) for test in grade_utils.cols]

            row.append(round_to(float(sum(row) + row[3] + (row[5] * 2)) / 10, 0.5))
            new_line = ",".join([str(v) for v in row])
            output.append(new_line)
    f = open('output.txt', 'w')
    file_contents = "\n".join(output)
    f.write(file_contents)
    f.close()
    print "Finished writing %d scores to output.txt" % (len(output) - 1,)

elif score_stdin or parse_stdin:
    import tree_utils
    trees = parsers.parse(cmd_utils.get_stdin())
    for tree in trees:
        print tree
        if score_stdin:
            sentence_transitions = tree_utils.transitions_in_tree(tree)
            sentence_probs = []
            for transition in sentence_transitions:
                print "Transitions: %s" % (transition)
                probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3)
                print "Probs: %s" % (probs)
                sentence_probs += probs
            total = 1
            for prob in sentence_probs:
                total *= prob
            print "Total: %f" % (total,)
elif sentence_parse_stdin:
Пример #2
0
                for test in grade_utils.cols
            ]

            row.append(
                round_to(float(sum(row) + row[3] + (row[5] * 2)) / 10, 0.5))
            new_line = ",".join([str(v) for v in row])
            output.append(new_line)
    f = open('output.txt', 'w')
    file_contents = "\n".join(output)
    f.write(file_contents)
    f.close()
    print "Finished writing %d scores to output.txt" % (len(output) - 1, )

elif score_stdin or parse_stdin:
    import tree_utils
    trees = parsers.parse(cmd_utils.get_stdin())
    for tree in trees:
        print tree
        if score_stdin:
            sentence_transitions = tree_utils.transitions_in_tree(tree)
            sentence_probs = []
            for transition in sentence_transitions:
                print "Transitions: %s" % (transition)
                probs = hmm_utils.prob_of_all_transitions(transition,
                                                          counts,
                                                          gram_size=3)
                print "Probs: %s" % (probs)
                sentence_probs += probs
            total = 1
            for prob in sentence_probs:
                total *= prob
Пример #3
0
    parse_for_max_prob = all_possible_sentences[all_possible_sentence_probs.index(max_prob)]
    log("All Probs: %s" % (all_possible_sentence_probs,), 2)
    log("MAX Prob: %f" % (max_prob,), 2)
    log("Parse for max prob: %s" % (parse_for_max_prob,), 2)
    log("Best Guess Num Sentences: %d" % (len(parse_for_max_prob),), 1)
    log("-------------\n\n", 1)

    if use_cache:
        cache_set("sentence_tokenizer", line, (parse_for_max_prob, max_prob))
    return (parse_for_max_prob, max_prob) if include_prob else parse_for_max_prob


if __name__ == '__main__':
    ## Simple method for testing from STDIN
    if use_stdin:
        print parse_sentences(cmd_utils.get_stdin())
    else:
        essays = essay_utils.essays if essay_index == -1 else [essay_utils.essays[essay_index]]

        essays_in_corpus = []

        for essay in essays:

            sentences_for_essay = []
            lines = essay if line_index == -1 else [essay[line_index]]

            for line in lines:
                sentences_for_essay.append(len(parse_sentences(line)))
            log("Sentence counts for essay: %s" % (sentences_for_essay,))
            essays_in_corpus.append(sentences_for_essay)
Пример #4
0
    log("All Probs: %s" % (all_possible_sentence_probs, ), 2)
    log("MAX Prob: %f" % (max_prob, ), 2)
    log("Parse for max prob: %s" % (parse_for_max_prob, ), 2)
    log("Best Guess Num Sentences: %d" % (len(parse_for_max_prob), ), 1)
    log("-------------\n\n", 1)

    if use_cache:
        cache_set("sentence_tokenizer", line, (parse_for_max_prob, max_prob))
    return (parse_for_max_prob,
            max_prob) if include_prob else parse_for_max_prob


if __name__ == '__main__':
    ## Simple method for testing from STDIN
    if use_stdin:
        print parse_sentences(cmd_utils.get_stdin())
    else:
        essays = essay_utils.essays if essay_index == -1 else [
            essay_utils.essays[essay_index]
        ]

        essays_in_corpus = []

        for essay in essays:

            sentences_for_essay = []
            lines = essay if line_index == -1 else [essay[line_index]]

            for line in lines:
                sentences_for_essay.append(len(parse_sentences(line)))
            log("Sentence counts for essay: %s" % (sentences_for_essay, ))