import hmm_utils import cmd_utils import parsers def round_to(n, precission): correction = 0.5 if n >= 0 else -0.5 return int(n/precission+correction)*precission counts = hmm_utils.get_transition_counts() # Flags that note that incode should be looked for in STDIN instead of # in a test essay file grade_directory = cmd_utils.cmd_arg('--dir', None) final_score_stdin = cmd_utils.cmd_flag('--final-score', None) parse_stdin = cmd_utils.cmd_flag('--parse', None) score_stdin = cmd_utils.cmd_flag('--score', None) pronoun_stdin = cmd_utils.cmd_flag('--pronoun', None) topic_stdin = cmd_utils.cmd_flag('--topic', None) syntactic_formation_stdin = cmd_utils.cmd_flag('--syn-formation', None) agreement_stdin = cmd_utils.cmd_flag('--agree', None) sentence_parse_stdin = cmd_utils.cmd_flag('--sen-token', None) word_order_parse_stdin = cmd_utils.cmd_flag('--word-order', None) transition_count = cmd_utils.cmd_arg('--count', None) transition_prob = cmd_utils.cmd_arg('--prob', None)
(2, 1, 2, 1,), (1, 1, 1, 1, 1, 1, 1, 1, 1, 1,), # 10 (2, 2, 2, 1,), (1, 2, 1, 2,), (3, 3, 2, 2,), (2, 3,), (2, 2, 1,), # 15 (1, 1, 1, 3, 1), (2, 1,), (2, 2, 1,), (2, 3,), (2, 2, 2, 2), # 20 ) counts = hmm_utils.get_transition_counts() essay_index = int(cmd_utils.cmd_arg('--essay', 0)) - 1 line_index = int(cmd_utils.cmd_arg('--line', -1)) use_stdin = cmd_utils.cmd_flag('--stdin') # How much to prefer long answers over shorter onces weight = .0001 invalid_boundary_tags = ('IN', 'CC', 'SINV', 'RP', 'TO') pers_pro_tags = ('PRP', 'PRP$') start_pers_pro_weight = 1000 def _possible_sentences_in_line(line, min_sentence_len=3): # The simplest thing here is to defer to the paper. If it looks like they've # added punctuation already, lets just use that
def grade_3a(text): sentences = sentence_tokenizer.parse(text) num_sentences = len(sentences) if num_sentences >= 6: return 5 else: return max(num_sentences - 1, 1) if __name__ == '__main__': import cmd_utils tests = cmd_utils.cmd_test() tests = [tests] if tests else ('1a', '1b', '1d', '2a', '2b', '3a') essay_index = int(cmd_utils.cmd_arg('--essay', 0)) - 1 for test in tests: if essay_index >= 0: essay_text = "\n".join(essay_utils.essays[essay_index]) received_grade = grade_text(essay_text, test) log("Expect %s score: %d" % (test, correct_essay_grade(essay_index, test)), 0) log("Received %s score: %d" % (test, received_grade), 0) else: print "Values for %s" % (test,) print "-------------" for i in range(0, len(essay_utils.essays)): essay_text = "\n".join(essay_utils.essays[i]) received_grade = grade_text(essay_text, test) expected_grade = correct_essay_grade(i, test) diff = received_grade - expected_grade
try: rs = mem_caches[cache_name][cache_key] log('Cache Hit: %s[%s]' % (cache_name, cache_key), 5) return rs except KeyError: return None def cache_set(cache_name, cache_key, cache_value): if cache_name not in mem_caches: cache_get(cache_name, cache_key) mem_caches[cache_name][cache_key] = cache_value _write_cache(cache_name) def _write_cache(cache_name): file_name = cache_name + '.data' f_write = open(os.path.join('cache', file_name), 'wb') pickle.dump(mem_caches[cache_name], f_write) f_write.close() if __name__ == "__main__": from cmd_utils import cmd_arg cache_key = cmd_arg('--key', None) cache_name = cmd_arg('--name', None) if cache_key and cache_name: cache_del(cache_name, cache_key)
import hmm_utils import cmd_utils import parsers def round_to(n, precission): correction = 0.5 if n >= 0 else -0.5 return int(n / precission + correction) * precission counts = hmm_utils.get_transition_counts() # Flags that note that incode should be looked for in STDIN instead of # in a test essay file grade_directory = cmd_utils.cmd_arg('--dir', None) final_score_stdin = cmd_utils.cmd_flag('--final-score', None) parse_stdin = cmd_utils.cmd_flag('--parse', None) score_stdin = cmd_utils.cmd_flag('--score', None) pronoun_stdin = cmd_utils.cmd_flag('--pronoun', None) topic_stdin = cmd_utils.cmd_flag('--topic', None) syntactic_formation_stdin = cmd_utils.cmd_flag('--syn-formation', None) agreement_stdin = cmd_utils.cmd_flag('--agree', None) sentence_parse_stdin = cmd_utils.cmd_flag('--sen-token', None) word_order_parse_stdin = cmd_utils.cmd_flag('--word-order', None) transition_count = cmd_utils.cmd_arg('--count', None) transition_prob = cmd_utils.cmd_arg('--prob', None) if grade_directory: import os
1, ), ( 2, 2, 1, ), ( 2, 3, ), (2, 2, 2, 2), # 20 ) counts = hmm_utils.get_transition_counts() essay_index = int(cmd_utils.cmd_arg('--essay', 0)) - 1 line_index = int(cmd_utils.cmd_arg('--line', -1)) use_stdin = cmd_utils.cmd_flag('--stdin') # How much to prefer long answers over shorter onces weight = .0001 invalid_boundary_tags = ('IN', 'CC', 'SINV', 'RP', 'TO') pers_pro_tags = ('PRP', 'PRP$') start_pers_pro_weight = 1000 def _possible_sentences_in_line(line, min_sentence_len=3): # The simplest thing here is to defer to the paper. If it looks like they've # added punctuation already, lets just use that has_abbr = sum([
def grade_3a(text): sentences = sentence_tokenizer.parse(text) num_sentences = len(sentences) if num_sentences >= 6: return 5 else: return max(num_sentences - 1, 1) if __name__ == '__main__': import cmd_utils tests = cmd_utils.cmd_test() tests = [tests] if tests else ('1a', '1b', '1d', '2a', '2b', '3a') essay_index = int(cmd_utils.cmd_arg('--essay', 0)) - 1 for test in tests: if essay_index >= 0: essay_text = "\n".join(essay_utils.essays[essay_index]) received_grade = grade_text(essay_text, test) log( "Expect %s score: %d" % (test, correct_essay_grade(essay_index, test)), 0) log("Received %s score: %d" % (test, received_grade), 0) else: print "Values for %s" % (test, ) print "-------------" for i in range(0, len(essay_utils.essays)): essay_text = "\n".join(essay_utils.essays[i]) received_grade = grade_text(essay_text, test)