Пример #1
0
def demo():
    """
    A demonstration of the recursive descent parser.
    """

    from nltk import parse, cfg
    
    grammar = cfg.parse_cfg("""
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """)
    
    for prod in grammar.productions():
        print prod
    
    sent = 'I saw a man in the park'.split()
    parser = parse.RecursiveDescentParser(grammar, trace=2)
    for p in parser.nbest_parse(sent):
        print p
Пример #2
0
def demo():
    from nltk import cfg
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s)
                                           for s in nonterminals.split()]
    
    grammar = cfg.parse_cfg("""
    S -> NP VP
    PP -> P NP
    NP -> Det N
    NP -> NP PP
    VP -> V NP
    VP -> VP PP
    Det -> 'a'
    Det -> 'the'
    Det -> 'my'
    NP -> 'I'
    N -> 'dog'
    N -> 'man'
    N -> 'park'
    N -> 'statue'
    V -> 'saw'
    P -> 'in'
    P -> 'up'
    P -> 'over'
    P -> 'with'
    """)

    def cb(grammar): print grammar
    top = Tk()
    editor = CFGEditor(top, grammar, cb)
    Label(top, text='\nTesting CFG Editor\n').pack()
    Button(top, text='Quit', command=top.destroy).pack()
    top.mainloop()
Пример #3
0
def cfg_demo():
    """
    A demonstration showing how C{Grammar}s can be created and used.
    """

    from nltk import cfg

    # Create some nonterminals
    S, NP, VP, PP = cfg.nonterminals('S, NP, VP, PP')
    N, V, P, Det = cfg.nonterminals('N, V, P, Det')
    VP_slash_NP = VP/NP

    print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP/NP]
    print '    S.symbol() =>', `S.symbol()`
    print

    print cfg.Production(S, [NP])

    # Create some Grammar Productions
    grammar = cfg.parse_cfg("""
      S -> NP VP
      PP -> P NP
      NP -> Det N | NP PP
      VP -> V NP | VP PP
      Det -> 'a' | 'the'
      N -> 'dog' | 'cat'
      V -> 'chased' | 'sat'
      P -> 'on' | 'in'
    """)

    print 'A Grammar:', `grammar`
    print '    grammar.start()       =>', `grammar.start()`
    print '    grammar.productions() =>',
    # Use string.replace(...) is to line-wrap the output.
    print `grammar.productions()`.replace(',', ',\n'+' '*25)
    print
    
    print 'Coverage of input words by a grammar:'
    print grammar.covers(['a','dog'])
    print grammar.covers(['a','toy'])
Пример #4
0
def cfg_demo():
    """
    A demonstration showing how C{Grammar}s can be created and used.
    """

    from nltk import cfg

    # Create some nonterminals
    S, NP, VP, PP = cfg.nonterminals('S, NP, VP, PP')
    N, V, P, Det = cfg.nonterminals('N, V, P, Det')
    VP_slash_NP = VP / NP

    print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP / NP]
    print '    S.symbol() =>', ` S.symbol() `
    print

    print cfg.Production(S, [NP])

    # Create some Grammar Productions
    grammar = cfg.parse_cfg("""
      S -> NP VP
      PP -> P NP
      NP -> Det N | NP PP
      VP -> V NP | VP PP
      Det -> 'a' | 'the'
      N -> 'dog' | 'cat'
      V -> 'chased' | 'sat'
      P -> 'on' | 'in'
    """)

    print 'A Grammar:', ` grammar `
    print '    grammar.start()       =>', ` grammar.start() `
    print '    grammar.productions() =>',
    # Use string.replace(...) is to line-wrap the output.
    print ` grammar.productions() `.replace(',', ',\n' + ' ' * 25)
    print

    print 'Coverage of input words by a grammar:'
    print grammar.covers(['a', 'dog'])
    print grammar.covers(['a', 'toy'])
Пример #5
0
def demo():
    """
    A demonstration of the shift-reduce parser.
    """

    from nltk import parse, cfg

    grammar = cfg.parse_cfg("""
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """)

    sent = 'I saw a man in the park'.split()

    parser = parse.ShiftReduceParser(grammar, trace=2)
    for p in parser.nbest_parse(sent):
        print p
Пример #6
0
def demo():
    """
    A demonstration of the shift-reduce parser.
    """

    from nltk import parse, cfg

    grammar = cfg.parse_cfg("""
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """)

    sent = 'I saw a man in the park'.split()

    parser = parse.ShiftReduceParser(grammar, trace=2)
    for p in parser.nbest_parse(sent):
        print p
Пример #7
0
def load(resource_url, format='auto', cache=True, verbose=False):
    """
    Load a given resource from the NLTK data package.  The following
    resource formats are currently supported:
      - C{'pickle'}
      - C{'yaml'}
      - C{'cfg'} (context free grammars)
      - C{'pcfg'} (probabilistic CFGs)
      - C{'fcfg'} (feature-based CFGs)
      - C{'fol'} (formulas of First Order Logic)
      - C{'val'} (valuation of First Order Logic model)
      - C{'raw'}

    If no format is specified, C{load()} will attempt to determine a
    format based on the resource name's file extension.  If that
    fails, C{load()} will raise a C{ValueError} exception.

    @type resource_url: C{str}
    @param resource_url: A URL specifying where the resource should be
        loaded from.  The default protocol is C{"nltk:"}, which searches
        for the file in the the NLTK data package.
    @type cache: C{bool}
    @param cache: If true, add this resource to a cache.  If C{load}
        finds a resource in its cache, then it will return it from the
        cache rather than loading it.  The cache uses weak references,
        so a resource wil automatically be expunged from the cache
        when no more objects are using it.
        
    @type verbose: C{bool}
    @param verbose: If true, print a message when loading a resource.
        Messages are not displayed when a resource is retrieved from
        the cache.
    """
    # If we've cached the resource, then just return it.
    if cache:
        resource_val = _resource_cache.get(resource_url)
        if resource_val is not None:
            if verbose:
                print '<<Using cached copy of %s>>' % (resource_url,)
            return resource_val
    
    # Let the user know what's going on.
    if verbose:
        print '<<Loading %s>>' % (resource_url,)

    # Determine the format of the resource.
    if format == 'auto':
        if resource_url.endswith('.pickle'): format = 'pickle'
        if resource_url.endswith('.yaml'): format = 'yaml'
        if resource_url.endswith('.cfg'): format = 'cfg'
        if resource_url.endswith('.pcfg'): format = 'pcfg'
        if resource_url.endswith('.fcfg'): format = 'fcfg'
        if resource_url.endswith('.fol'): format = 'fol'
        if resource_url.endswith('.val'): format = 'val'
        
    # Load the resource.
    if format == 'pickle':
        resource_val = pickle.load(_open(resource_url))
    elif format == 'yaml':
        resource_val = yaml.load(_open(resource_url))
    elif format == 'cfg':
        resource_val = cfg.parse_cfg(_open(resource_url).read())
    elif format == 'pcfg':
        resource_val = cfg.parse_pcfg(_open(resource_url).read())
    elif format == 'fcfg':
        resource_val = cfg.parse_fcfg(_open(resource_url).read())
    elif format == 'fol':
        resource_val = sem.parse_fol(_open(resource_url).read())
    elif format == 'val':
        resource_val = sem.parse_valuation(_open(resource_url).read())
    elif format == 'raw':
        resource_val = _open(resource_url).read()
    else:
        raise ValueError('Unknown format type!')

    # If requested, add it to the cache.
    if cache:
        try:
            _resource_cache[resource_url] = resource_val
        except TypeError:
            # We can't create weak references to some object types, like
            # strings and tuples.  For now, just don't cache them.
            pass
    
    return resource_val
Пример #8
0
            for frag2 in _generate_all(grammar, items[1:]):
                for frag in _multiply(frag1, frag2):
                    frags.append(frag)
    return frags
            
def _multiply(frag1, frag2):
    frags = []
    if len(frag1) == 1:
        frag1 = [frag1]
    if len(frag2) == 1:
        frag2 = [frag2]
    for f1 in frag1:
        for f2 in frag2:
            frags.append(f1+f2)
    return frags

grammar = cfg.parse_cfg("""
  S -> NP VP
  NP -> Det N
  VP -> V NP
  Det -> 'the'
  Det -> 'a'
  N -> 'man' | 'park' | 'dog' | 'telescope'
  V -> 'saw' | 'walked'
  P -> 'in' | 'with'
""")

for sent in generate(grammar):
    print sent
    
Пример #9
0
                for frag in _multiply(frag1, frag2):
                    frags.append(frag)
    return frags


def _multiply(frag1, frag2):
    frags = []
    if len(frag1) == 1:
        frag1 = [frag1]
    if len(frag2) == 1:
        frag2 = [frag2]
    for f1 in frag1:
        for f2 in frag2:
            frags.append(f1 + f2)
    return frags


grammar = cfg.parse_cfg("""
  S -> NP VP
  NP -> Det N
  VP -> V NP
  Det -> 'the'
  Det -> 'a'
  N -> 'man' | 'park' | 'dog' | 'telescope'
  V -> 'saw' | 'walked'
  P -> 'in' | 'with'
""")

for sent in generate(grammar):
    print sent
Пример #10
0
def load(resource_url, format='auto', cache=True, verbose=False):
    """
    Load a given resource from the NLTK data package.  The following
    resource formats are currently supported:
      - C{'pickle'}
      - C{'yaml'}
      - C{'cfg'} (context free grammars)
      - C{'pcfg'} (probabilistic CFGs)
      - C{'fcfg'} (feature-based CFGs)
      - C{'fol'} (formulas of First Order Logic)
      - C{'val'} (valuation of First Order Logic model)
      - C{'raw'}

    If no format is specified, C{load()} will attempt to determine a
    format based on the resource name's file extension.  If that
    fails, C{load()} will raise a C{ValueError} exception.

    @type resource_url: C{str}
    @param resource_url: A URL specifying where the resource should be
        loaded from.  The default protocol is C{"nltk:"}, which searches
        for the file in the the NLTK data package.
    @type cache: C{bool}
    @param cache: If true, add this resource to a cache.  If C{load}
        finds a resource in its cache, then it will return it from the
        cache rather than loading it.  The cache uses weak references,
        so a resource wil automatically be expunged from the cache
        when no more objects are using it.
        
    @type verbose: C{bool}
    @param verbose: If true, print a message when loading a resource.
        Messages are not displayed when a resource is retrieved from
        the cache.
    """
    # If we've cached the resource, then just return it.
    if cache:
        resource_val = _resource_cache.get(resource_url)
        if resource_val is not None:
            if verbose:
                print '<<Using cached copy of %s>>' % (resource_url, )
            return resource_val

    # Let the user know what's going on.
    if verbose:
        print '<<Loading %s>>' % (resource_url, )

    # Determine the format of the resource.
    if format == 'auto':
        if resource_url.endswith('.pickle'): format = 'pickle'
        if resource_url.endswith('.yaml'): format = 'yaml'
        if resource_url.endswith('.cfg'): format = 'cfg'
        if resource_url.endswith('.pcfg'): format = 'pcfg'
        if resource_url.endswith('.fcfg'): format = 'fcfg'
        if resource_url.endswith('.fol'): format = 'fol'
        if resource_url.endswith('.val'): format = 'val'

    # Load the resource.
    if format == 'pickle':
        resource_val = pickle.load(_open(resource_url))
    elif format == 'yaml':
        resource_val = yaml.load(_open(resource_url))
    elif format == 'cfg':
        resource_val = cfg.parse_cfg(_open(resource_url).read())
    elif format == 'pcfg':
        resource_val = cfg.parse_pcfg(_open(resource_url).read())
    elif format == 'fcfg':
        resource_val = cfg.parse_fcfg(_open(resource_url).read())
    elif format == 'fol':
        resource_val = sem.parse_fol(_open(resource_url).read())
    elif format == 'val':
        resource_val = sem.parse_valuation(_open(resource_url).read())
    elif format == 'raw':
        resource_val = _open(resource_url).read()
    else:
        raise ValueError('Unknown format type!')

    # If requested, add it to the cache.
    if cache:
        try:
            _resource_cache[resource_url] = resource_val
        except TypeError:
            # We can't create weak references to some object types, like
            # strings and tuples.  For now, just don't cache them.
            pass

    return resource_val