Python transitions_in_tree示例，tree_utils.transitions_in_tree Python示例

示例#1

0

显示文件

文件： hmm_utils.py 项目： snyderp/cs412-scorer

def get_transition_counts():
    file_name = 'penn_transition_counts.data'

    try:
        f = open(os.path.join('cache', file_name), 'rb')
        data = pickle.load(f)
        f.close()
        return data
    except (IOError, EOFError):
        cmd_utils.log("Building counts from Penn Treebank corpus", 1)
        f = open(os.path.join('cache', file_name), 'wb')

        for sentence in nltk.corpus.treebank.parsed_sents():
            all_transitions = tree_utils.transitions_in_tree(sentence)
            for transitions in all_transitions:
                transitions = ['START'] + transitions
                if len(transitions) > 1:
                    store_transitions(transitions)

        cmd_utils.log("Finished building tag counts", 1)
        pickle.dump(store_transitions._counts, f)
        f.close()
        return store_transitions._counts

示例#2

0

显示文件

文件： hmm_utils.py 项目： pes10k/cs412-scorer

def get_transition_counts():
    file_name = 'penn_transition_counts.data'

    try:
        f = open(os.path.join('cache', file_name), 'rb')
        data = pickle.load(f)
        f.close()
        return data
    except (IOError, EOFError):
        cmd_utils.log("Building counts from Penn Treebank corpus", 1)
        f = open(os.path.join('cache', file_name), 'wb')

        for sentence in nltk.corpus.treebank.parsed_sents():
            all_transitions = tree_utils.transitions_in_tree(sentence)
            for transitions in all_transitions:
                transitions = ['START'] + transitions
                if len(transitions) > 1:
                    store_transitions(transitions)

        cmd_utils.log("Finished building tag counts", 1)
        pickle.dump(store_transitions._counts, f)
        f.close()
        return store_transitions._counts

示例#3

0

显示文件

文件： scoring_utils.py 项目： snyderp/cs412-scorer

            row.append(round_to(float(sum(row) + row[3] + (row[5] * 2)) / 10, 0.5))
            new_line = ",".join([str(v) for v in row])
            output.append(new_line)
    f = open('output.txt', 'w')
    file_contents = "\n".join(output)
    f.write(file_contents)
    f.close()
    print "Finished writing %d scores to output.txt" % (len(output) - 1,)

elif score_stdin or parse_stdin:
    import tree_utils
    trees = parsers.parse(cmd_utils.get_stdin())
    for tree in trees:
        print tree
        if score_stdin:
            sentence_transitions = tree_utils.transitions_in_tree(tree)
            sentence_probs = []
            for transition in sentence_transitions:
                print "Transitions: %s" % (transition)
                probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3)
                print "Probs: %s" % (probs)
                sentence_probs += probs
            total = 1
            for prob in sentence_probs:
                total *= prob
            print "Total: %f" % (total,)
elif sentence_parse_stdin:
    import sentence_tokenizer
    sentences = sentence_tokenizer.parse_sentences(cmd_utils.get_stdin(), use_cache=False)
    print sentences
elif word_order_parse_stdin:

示例#4

0

显示文件

文件： sentence_tokenizer.py 项目： nader92011/zdotfiles

def parse_sentences(line, use_cache=True, include_prob=False):

    log("Working on: %s" % (line,), 2)

    if use_cache:
        correct_parse = cache_get("sentence_tokenizer", line)
        if correct_parse:
            log("Cache Hit: %s" % (correct_parse[0],), 4)
            log("-------------\n", 4)
            return correct_parse if include_prob else correct_parse[0]

    all_possible_sentences = _possible_sentences_in_line(line)
    all_possible_sentence_probs = []
    invalid_possible_sentences = []
    stored_probs = {}

    for possible_sentences in all_possible_sentences:

        log("Examining: %s" % (possible_sentences,), 1)
        prob_for_sentences = []
        sent_is_impossible = False

        for possible_sentence in possible_sentences:

            if use_cache:
                possible_sentence_prob = cache_get('possible_sentences', possible_sentence)
                if possible_sentence_prob is not None:
                    log("Cache Hit: %s (from %s)" % (possible_sentence, 'possible sentences'), 4)
                    prob_for_sentences.append(possible_sentence_prob)
                    continue

            if contains_any_invalid_setences(possible_sentences, invalid_possible_sentences) or sent_is_impossible:
                prob_for_sentences.append(0)
                continue
            elif possible_sentence in stored_probs:
                prob_for_sentences.append(stored_probs[possible_sentence])
                continue

            sentence_trees = parsers.parse(possible_sentence)
            if len(sentence_trees) == 0:
                log("Wasn't able to parse input %s" % (possible_sentence,), 0)
                prob_for_sentences.append(0)
                invalid_possible_sentences.append(possible_sentence)
                sent_is_impossible = True
                continue
            else:
                sentence_tree = sentence_trees[0]

            if cmd_log_level() >= 4:
                print "--------"
                print "Pre Simplified Tree"
                print sentence_tree

            tree_utils.simplify_tree(sentence_tree,
                                     remove_starting_cc=possible_sentences.index(possible_sentence) == 0)

            if cmd_log_level() >= 4:
                print "--------"
                print "Post Simplified Tree"
                print sentence_tree

            sentence_transitions = tree_utils.transitions_in_tree(sentence_tree)

            if not is_possible_sentence(sentence_tree):
                log("%s" % (sentence_transitions,), 2)
                log("Invalid parse", 2)
                prob_for_sentences.append(0)
                invalid_possible_sentences.append(possible_sentence)
                sent_is_impossible = True
                if use_cache:
                    cache_set('possible_sentences', possible_sentence, 0)
            else:
                log("%s" % (sentence_transitions,), 2)
                sentence_probs = []
                for transition in sentence_transitions:
                    try:
                        probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3)
                    except KeyError, e:
                        log("'Imposible' Tag order", 2, sep=' ** ')
                        log("%s" % (e,), 2, sep=' ** ')
                        probs = [0]
                    sentence_probs += probs
                    log("Transitions: %s" % (transition,), 3)
                    log("Probabilities: %s" % (probs,), 3)

                attempt_sentence_prob = prod(sentence_probs)

                sentence_prob_boost = boost_for_sentence_tree(sentence_tree)
                attempt_sentence_prob *= sentence_prob_boost

                prob_for_sentences.append(attempt_sentence_prob)
                stored_probs[possible_sentence] = attempt_sentence_prob
                if use_cache:
                    cache_set('possible_sentences', possible_sentence, attempt_sentence_prob)
        weighted_score = prod(prob_for_sentences) * (weight ** (len(possible_sentences) - 1))
        if weighted_score > 0:
            log("Valid Parse: %s" % (possible_sentences,), 2)
            log(weighted_score, 2)

        all_possible_sentence_probs.append(weighted_score)

示例#5

0

显示文件

文件： scoring_utils.py 项目： pes10k/cs412-scorer

                round_to(float(sum(row) + row[3] + (row[5] * 2)) / 10, 0.5))
            new_line = ",".join([str(v) for v in row])
            output.append(new_line)
    f = open('output.txt', 'w')
    file_contents = "\n".join(output)
    f.write(file_contents)
    f.close()
    print "Finished writing %d scores to output.txt" % (len(output) - 1, )

elif score_stdin or parse_stdin:
    import tree_utils
    trees = parsers.parse(cmd_utils.get_stdin())
    for tree in trees:
        print tree
        if score_stdin:
            sentence_transitions = tree_utils.transitions_in_tree(tree)
            sentence_probs = []
            for transition in sentence_transitions:
                print "Transitions: %s" % (transition)
                probs = hmm_utils.prob_of_all_transitions(transition,
                                                          counts,
                                                          gram_size=3)
                print "Probs: %s" % (probs)
                sentence_probs += probs
            total = 1
            for prob in sentence_probs:
                total *= prob
            print "Total: %f" % (total, )
elif sentence_parse_stdin:
    import sentence_tokenizer
    sentences = sentence_tokenizer.parse_sentences(cmd_utils.get_stdin(),

示例#6

0

显示文件

def parse_sentences(line, use_cache=True, include_prob=False):

    log("Working on: %s" % (line, ), 2)

    if use_cache:
        correct_parse = cache_get("sentence_tokenizer", line)
        if correct_parse:
            log("Cache Hit: %s" % (correct_parse[0], ), 4)
            log("-------------\n", 4)
            return correct_parse if include_prob else correct_parse[0]

    all_possible_sentences = _possible_sentences_in_line(line)
    all_possible_sentence_probs = []
    invalid_possible_sentences = []
    stored_probs = {}

    for possible_sentences in all_possible_sentences:

        log("Examining: %s" % (possible_sentences, ), 1)
        prob_for_sentences = []
        sent_is_impossible = False

        for possible_sentence in possible_sentences:

            if use_cache:
                possible_sentence_prob = cache_get('possible_sentences',
                                                   possible_sentence)
                if possible_sentence_prob is not None:
                    log(
                        "Cache Hit: %s (from %s)" %
                        (possible_sentence, 'possible sentences'), 4)
                    prob_for_sentences.append(possible_sentence_prob)
                    continue

            if contains_any_invalid_setences(
                    possible_sentences,
                    invalid_possible_sentences) or sent_is_impossible:
                prob_for_sentences.append(0)
                continue
            elif possible_sentence in stored_probs:
                prob_for_sentences.append(stored_probs[possible_sentence])
                continue

            sentence_trees = parsers.parse(possible_sentence)
            if len(sentence_trees) == 0:
                log("Wasn't able to parse input %s" % (possible_sentence, ), 0)
                prob_for_sentences.append(0)
                invalid_possible_sentences.append(possible_sentence)
                sent_is_impossible = True
                continue
            else:
                sentence_tree = sentence_trees[0]

            if cmd_log_level() >= 4:
                print "--------"
                print "Pre Simplified Tree"
                print sentence_tree

            tree_utils.simplify_tree(
                sentence_tree,
                remove_starting_cc=possible_sentences.index(
                    possible_sentence) == 0)

            if cmd_log_level() >= 4:
                print "--------"
                print "Post Simplified Tree"
                print sentence_tree

            sentence_transitions = tree_utils.transitions_in_tree(
                sentence_tree)

            if not is_possible_sentence(sentence_tree):
                log("%s" % (sentence_transitions, ), 2)
                log("Invalid parse", 2)
                prob_for_sentences.append(0)
                invalid_possible_sentences.append(possible_sentence)
                sent_is_impossible = True
                if use_cache:
                    cache_set('possible_sentences', possible_sentence, 0)
            else:
                log("%s" % (sentence_transitions, ), 2)
                sentence_probs = []
                for transition in sentence_transitions:
                    try:
                        probs = hmm_utils.prob_of_all_transitions(transition,
                                                                  counts,
                                                                  gram_size=3)
                    except KeyError, e:
                        log("'Imposible' Tag order", 2, sep=' ** ')
                        log("%s" % (e, ), 2, sep=' ** ')
                        probs = [0]
                    sentence_probs += probs
                    log("Transitions: %s" % (transition, ), 3)
                    log("Probabilities: %s" % (probs, ), 3)

                attempt_sentence_prob = prod(sentence_probs)

                sentence_prob_boost = boost_for_sentence_tree(sentence_tree)
                attempt_sentence_prob *= sentence_prob_boost

                prob_for_sentences.append(attempt_sentence_prob)
                stored_probs[possible_sentence] = attempt_sentence_prob
                if use_cache:
                    cache_set('possible_sentences', possible_sentence,
                              attempt_sentence_prob)
        weighted_score = prod(prob_for_sentences) * (weight**(
            len(possible_sentences) - 1))
        if weighted_score > 0:
            log("Valid Parse: %s" % (possible_sentences, ), 2)
            log(weighted_score, 2)

        all_possible_sentence_probs.append(weighted_score)