Exemplo n.º 1
0
def dependency_grammar():
    groucho_dep_grammar = nltk.parse_dependency_grammar("""
    'shot' -> 'I' | 'elephant' | 'in'
    'elephant' -> 'an' | 'in'
    'in' -> 'pajamas'
    'pajamas' -> 'my'
  """)
    print groucho_dep_grammar
    pdp = nltk.ProjectiveDependencyParser(groucho_dep_grammar)
    sent = "I shot an elephant in my pajamas".split()
    trees = pdp.parse(sent)
    for tree in trees:
        #    tree.draw()
        print tree
Exemplo n.º 2
0
def dependency_graph():
    errors = []
    results = []
    sentence = ""
    rules = {}
    nodes = []
    grammar = ""
    tags = []
    words = []  #words extracted from the graph
    if request.method == "POST":
        # get sentence that the user has entered
        try:
            if 'sentence' in request.form.keys():
                sentence = request.form['sentence']
        except BaseException as e:
            errors.append(
                "Unable to get Sentence. Please make sure it's valid and try again."
            )
            errors.append(e)
            # check the form with: errors.append(request.form)
        #folder for ntlk
        nltk.data.path.append('./nltk_data/')
        #code for TREE implementation, grammar rules to be printed
        grammarPrint = [
            "\'fell\' -> \'price\' | \'stock\'", "\'price\' -> \'of\' \'the\'",
            "\'of\' -> \'stock\'", "\'stock' -> 'the\'"
        ]
        #the grammar rules for nltk
        grammar_rules = nltk.DependencyGrammar.fromstring(
            "\n".join(grammarPrint))
        dp = nltk.ProjectiveDependencyParser(grammar_rules)

        for t in sorted(
                dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
            results.append(t)

        #rules = grammarPrint

    return render_template(
        'dependency_graph.html',
        errors=errors,
        results=results,
        rules=rules,
        nodes=nodes
    )  #CREDIT: http://code.runnable.com/UiPcaBXaxGNYAAAL/how-to-upload-a-file-to-the-server-in-flask-for-python
Exemplo n.º 3
0
def projective():
    errors = []
    results = []
    sentence = ""
    rules = {}
    nodes = []
    if request.method == "POST":
        # get sentence that the user has entered
        try:
            if 'sentence' in request.form.keys():
                sentence = request.form['sentence']
        except BaseException as e:
            errors.append(
                "Unable to get Sentence. Please make sure it's valid and try again."
            )
            errors.append(e)
            # check the form with: errors.append(request.form)

        nltk.data.path.append('./nltk_data/')
        grammarPrint = [
            "\'fell\' -> \'price\' | \'stock\'", "\'price\' -> \'of\' \'the\'",
            "\'of\' -> \'stock\'", "\'stock' -> 'the\'"
        ]
        grammar = nltk.DependencyGrammar.fromstring("\n".join(grammarPrint))
        dp = nltk.ProjectiveDependencyParser(grammar)
        for t in sorted(
                dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
            results.append(t)

        rules = grammarPrint

    return render_template('projective_tree_version.html',
                           errors=errors,
                           results=results,
                           rules=rules,
                           nodes=nodes)
Exemplo n.º 4
0
  V -> "saw" | "ate" | "walked" | "shot" | "book"
  NP -> Prop | Det N | Det N PP
  Prop -> "John" | "Mary" | "Bob" | "I"
  Det -> "a" | "an" | "the" | "my" | "that"
  N -> "man" | "dog" | "cat" | "telescope" | "park" | "elephant" | "pajamas" | "flight"
  P -> "in" | "on" | "by" | "with"
  """)

# make a recursive descent parser and parse the sentence
rd_parser = nltk.RecursiveDescentParser(flight_grammar)
sent5list = 'book that flight'.split()
for tree in rd_parser.parse(sent5list):
    print(tree)

## (Optional) Look at Dependency grammars in the NLTK book, section 8.5
# a dependency grammar for the groucho example
# with dependency grammar, it actually focuses on the syntactic relations in the sentence
groucho_dep_grammar = nltk.DependencyGrammar.fromstring("""
  'shot' -> 'I' | 'elephant' | 'in'
  'elephant' -> 'an' | 'in'
  'in' -> 'pajamas'
  'pajamas' -> 'my'
  """)

print(groucho_dep_grammar)
pdp = nltk.ProjectiveDependencyParser(groucho_dep_grammar)
glist = 'I shot an elephant in my pajamas'.split()
trees = pdp.parse(glist)
for tree in trees:
    print(tree)
Exemplo n.º 5
0
result = list(sdp.raw_parse(sentence))

result[0]

[item for item in result[0].triples()]

dep_tree = [parse.tree() for parse in result][0]
print dep_tree
dep_tree.draw()

import nltk
tokens = nltk.word_tokenize(sentence)

dependency_rules = """
'fox' -> 'The' | 'brown'
'quick' -> 'fox' | 'is' | 'and' | 'jumping'
'jumping' -> 'he' | 'is' | 'dog'
'dog' -> 'over' | 'the' | 'lazy'
"""

dependency_grammar = nltk.grammar.DependencyGrammar.fromstring(
    dependency_rules)
print dependency_grammar

dp = nltk.ProjectiveDependencyParser(dependency_grammar)
res = [item for item in dp.parse(tokens)]
tree = res[0]
print tree

tree.draw()
Exemplo n.º 6
0
#### Dependenzgrammatik ####

# sehr einfache Lösung mit dem NLTK-Buch

dep_grammar = nltk.DependencyGrammar.fromstring("""
   'sah' -> 'Frau' | 'Mann' | 'mit' |'auf'
   'Frau' -> 'Die'
   'Mann' -> 'den'
   'mit' -> 'Fernrohr'
   'Fernrohr' -> 'dem'
   'auf' -> 'Buch'
   'Buch' -> 'dem'
   """)

dep_parser = nltk.ProjectiveDependencyParser(dep_grammar)
trees = dep_parser.parse(tokens)
##for tree in trees:
##    print (tree)
##    tree.draw()


#### Termextraktion: Alle Nomen und Namen im Adidas-Bericht ####

#erst mache ich eine Terminologie-Liste auf

terms = []

#dann extrahiere ich alle Wörter mit einem POS-Tag, der mit "N" anfängt

def termex(txt):
Exemplo n.º 7
0
The state-of-the-art dependency parser is called `maltparser`, and nltk
includes an interface to `maltparser`, but maltparser must be installed
separately. (For the sake of time we won't be using it, but you can google it.)

''')

input('Look at the following code for dependency parsing.\n')

toy_dep_grammar = nltk.DependencyGrammar.fromstring("""
    'shot' -> 'I' | 'elephant' | 'in'
    'elephant' -> 'an' | 'in'
    'in' -> 'pajamas'
    'pajamas' -> 'my'
    """)
pdp = nltk.ProjectiveDependencyParser(toy_dep_grammar)
sent = 'I shot an elephant in my pajamas'.split()
print(f'parsing {sent.__repr__}...')
ptrees = pdp.parse(sent)
for i, tree in enumerate(ptrees):
    print(tree, tree.height())
    TreeView(tree)._cframe.print_to_file(f'dep_tree{i}.ps')

print('to convert images, run...\n $ convert tree0.ps tree0.png')

input('[enter] to continue.\n')

print('\n' + '#' * 79)
print('''Constituency parsing

Some theories of syntax assume that there are implicit structures to annotate,