Exemplo n.º 1
0
#### MEMORY-BASED SHALLOW PARSER ######################################################################

# Copyright (c) 2003-2010 University of Antwerp, Belgium and Tilburg University, The Netherlands
# License: GNU General Public License, see LICENSE.txt

######################################################################################################

# Add the upper directory (where the MBSP module is) to the search path.
import os, sys; sys.path.insert(0, os.path.join("..", ".."))
import MBSP

if not MBSP.config.autostart:
    MBSP.start()

s = MBSP.parse("I ate many slices of pizza with a fork.")
s = MBSP.split(s)

# A useful operation is to extract the heads in a sentence,
# for example to create a "normalized" sentence, or to construct a Timbl lookup instance.
# A head is the principal word in a chunk.
# We could retrieve the heads by iterating over Sentence.chunks, 
# but this would skip the loose words in between chunks (e.g. "and" or ","),
# which can also be useful, particularly in the case of contructing a lookup instance.
# Sentence.constituents() returns an in-order list of mixed Chunk and Word objects 
# that can be used for this purpose:
heads = []
for p in s[0].constituents(pnp=False):
    if isinstance(p, MBSP.Word):
        heads.append((
            p.index, 
            p.lemma))