示例#1
0
def main(args):

    grammar = nltk.data.load(args.input_PCFG_file)
    # print(grammar._rhs_index)
    tot_sentences = 0
    tot_parses = 0
    with open(args.test_sentence_filename) as f:
        for line in f:
            line = line.strip()
            # print(line)
            token_list = nltk.word_tokenize(line)
            if args.improved:
                table = cky_build_table_improved(token_list, grammar)
            else:
                table = cky_build_table(token_list, grammar)
            # print_table(table)
            try:
                if args.improved:
                    parses = [parse_table_improved(grammar, table)]
                else:
                    parses = parse_table_orig(grammar, table)
                max_parse = None
                max_prob = -1
                for p in parses:
                    if p.prob() > max_prob:
                        max_prob = p.prob()
                        max_parse = p
                # print(str(max_parse).replace("\n", " ")) # for best parse and probability
                print(Tree.__str__(max_parse).replace(
                    "\n", " "))  # for best parse only
                # print(f"Number of possible parses: {len(parses)}", "\n")
                tot_parses += len(parses)
            except NoParsesException:
                print("")  # Print blank
            tot_sentences += 1
示例#2
0
def fix_multi_sent(line):
    tree = Tree.fromstring(line)
    if len(tree) > 1:
        newtree = Tree('S1', [Tree('S', tree[:])])
    else:
        newtree = tree
    return re.sub('\n\s*', ' ', newtree.__str__())