Пример #1
0
Файл: util.py Проект: gijs/nltk
def batch_parse(inputs, grammar, trace=0):
    """
    Convert input sentences into syntactic trees.
    
    :param inputs: sentences to be parsed
    :type inputs: list of str
    :param grammar: L{FeatureGrammar} or name of feature-based grammar
    :rtype: dict
    :return: a mapping from input sentences to a list of L{Tree}s
    """

    # put imports here to avoid circult dependencies
    from nltk.grammar import FeatureGrammar
    from nltk.parse import FeatureChartParser, load_parser

    if isinstance(grammar, FeatureGrammar):
        cp = FeatureChartParser(grammar)
    else:
        cp = load_parser(grammar, trace=trace)
    parses = []
    for sent in inputs:
        tokens = sent.split() # use a tokenizer?
        syntrees = cp.nbest_parse(tokens)
        parses.append(syntrees)
    return parses
Пример #2
0
def batch_parse(inputs, grammar, trace=0):
    """
    Convert input sentences into syntactic trees.

    :param inputs: sentences to be parsed
    :type inputs: list of str
    :param grammar: ``FeatureGrammar`` or name of feature-based grammar
    :rtype: dict
    :return: a mapping from input sentences to a list of ``Tree``s
    """

    # put imports here to avoid circult dependencies
    from nltk.grammar import FeatureGrammar
    from nltk.parse import FeatureChartParser, load_parser

    if isinstance(grammar, FeatureGrammar):
        cp = FeatureChartParser(grammar)
    else:
        cp = load_parser(grammar, trace=trace)
    parses = []
    for sent in inputs:
        tokens = sent.split() # use a tokenizer?
        syntrees = cp.nbest_parse(tokens)
        parses.append(syntrees)
    return parses
Пример #3
0
def parse_sents(inputs, grammar, trace=0):
    """
    Convert input sentences into syntactic trees.

    :param inputs: sentences to be parsed
    :type inputs: list(str)
    :param grammar: ``FeatureGrammar`` or name of feature-based grammar
    :type grammar: nltk.grammar.FeatureGrammar
    :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
    :return: a mapping from input sentences to a list of ``Tree``s
    """
    # put imports here to avoid circult dependencies
    from nltk.grammar import FeatureGrammar
    from nltk.parse import FeatureChartParser, load_parser

    if isinstance(grammar, FeatureGrammar):
        cp = FeatureChartParser(grammar)
    else:
        cp = load_parser(grammar, trace=trace)
    parses = []
    for sent in inputs:
        tokens = sent.split()  # use a tokenizer?
        syntrees = list(cp.parse(tokens))
        parses.append(syntrees)
    return parses
Пример #4
0
def parse_sents(inputs, grammar, trace=0):
    """
    Convert input sentences into syntactic trees.

    :param inputs: sentences to be parsed
    :type inputs: list(str)
    :param grammar: ``FeatureGrammar`` or name of feature-based grammar
    :type grammar: nltk.grammar.FeatureGrammar
    :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
    :return: a mapping from input sentences to a list of ``Tree`` instances.
    """
    # put imports here to avoid circult dependencies
    from nltk.grammar import FeatureGrammar
    from nltk.parse import FeatureChartParser, load_parser

    if isinstance(grammar, FeatureGrammar):
        cp = FeatureChartParser(grammar)
    else:
        cp = load_parser(grammar, trace=trace)
    parses = []
    for sent in inputs:
        tokens = sent.split()  # use a tokenizer?
        syntrees = list(cp.parse(tokens))
        parses.append(syntrees)
    return parses
Пример #5
0
    use_local_file = False
    if use_local_file:
        if 'hw6' in os.listdir():
            os.chdir('hw6')
        input_pcfg_filename = 'grammar.fcfg'
        sentences_filename = 'sentences.txt'
        output_parses_filename = "hw6_output.txt"
    else:
        input_pcfg_filename = sys.argv[1]
        sentences_filename = sys.argv[2]
        output_parses_filename = sys.argv[3]

    gr = nltk.data.load(input_pcfg_filename,
                        format='fcfg')  # read fcfg grammar

    parser = FeatureChartParser(gr)  # Initialize parserxx

    output_file = open(output_parses_filename, 'w')

    # Parse sentences
    with open(sentences_filename, 'r') as sent_file:
        line = sent_file.readline().strip('\n')
        while line:
            output_file.write(line + '\n')
            words = nltk.word_tokenize(line)
            try:
                parsed = parser.parse(words)
                tree = parsed.__iter__()
                out_tree = ''
                for tree in parsed:
                    out_tree = str(tree.label()['SEM'].simplify())