def demo(): """ A demonstration of the recursive descent parser. """ from nltk import parse, cfg grammar = cfg.parse_cfg(""" S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP PP -> P NP NP -> 'I' N -> 'man' | 'park' | 'telescope' | 'dog' Det -> 'the' | 'a' P -> 'in' | 'with' V -> 'saw' """) for prod in grammar.productions(): print prod sent = 'I saw a man in the park'.split() parser = parse.RecursiveDescentParser(grammar, trace=2) for p in parser.nbest_parse(sent): print p
def demo(): from nltk import cfg nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s) for s in nonterminals.split()] grammar = cfg.parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N NP -> NP PP VP -> V NP VP -> VP PP Det -> 'a' Det -> 'the' Det -> 'my' NP -> 'I' N -> 'dog' N -> 'man' N -> 'park' N -> 'statue' V -> 'saw' P -> 'in' P -> 'up' P -> 'over' P -> 'with' """) def cb(grammar): print grammar top = Tk() editor = CFGEditor(top, grammar, cb) Label(top, text='\nTesting CFG Editor\n').pack() Button(top, text='Quit', command=top.destroy).pack() top.mainloop()
def cfg_demo(): """ A demonstration showing how C{Grammar}s can be created and used. """ from nltk import cfg # Create some nonterminals S, NP, VP, PP = cfg.nonterminals('S, NP, VP, PP') N, V, P, Det = cfg.nonterminals('N, V, P, Det') VP_slash_NP = VP/NP print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP/NP] print ' S.symbol() =>', `S.symbol()` print print cfg.Production(S, [NP]) # Create some Grammar Productions grammar = cfg.parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | NP PP VP -> V NP | VP PP Det -> 'a' | 'the' N -> 'dog' | 'cat' V -> 'chased' | 'sat' P -> 'on' | 'in' """) print 'A Grammar:', `grammar` print ' grammar.start() =>', `grammar.start()` print ' grammar.productions() =>', # Use string.replace(...) is to line-wrap the output. print `grammar.productions()`.replace(',', ',\n'+' '*25) print print 'Coverage of input words by a grammar:' print grammar.covers(['a','dog']) print grammar.covers(['a','toy'])
def cfg_demo(): """ A demonstration showing how C{Grammar}s can be created and used. """ from nltk import cfg # Create some nonterminals S, NP, VP, PP = cfg.nonterminals('S, NP, VP, PP') N, V, P, Det = cfg.nonterminals('N, V, P, Det') VP_slash_NP = VP / NP print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP / NP] print ' S.symbol() =>', ` S.symbol() ` print print cfg.Production(S, [NP]) # Create some Grammar Productions grammar = cfg.parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | NP PP VP -> V NP | VP PP Det -> 'a' | 'the' N -> 'dog' | 'cat' V -> 'chased' | 'sat' P -> 'on' | 'in' """) print 'A Grammar:', ` grammar ` print ' grammar.start() =>', ` grammar.start() ` print ' grammar.productions() =>', # Use string.replace(...) is to line-wrap the output. print ` grammar.productions() `.replace(',', ',\n' + ' ' * 25) print print 'Coverage of input words by a grammar:' print grammar.covers(['a', 'dog']) print grammar.covers(['a', 'toy'])
def demo(): """ A demonstration of the shift-reduce parser. """ from nltk import parse, cfg grammar = cfg.parse_cfg(""" S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP PP -> P NP NP -> 'I' N -> 'man' | 'park' | 'telescope' | 'dog' Det -> 'the' | 'a' P -> 'in' | 'with' V -> 'saw' """) sent = 'I saw a man in the park'.split() parser = parse.ShiftReduceParser(grammar, trace=2) for p in parser.nbest_parse(sent): print p
def load(resource_url, format='auto', cache=True, verbose=False): """ Load a given resource from the NLTK data package. The following resource formats are currently supported: - C{'pickle'} - C{'yaml'} - C{'cfg'} (context free grammars) - C{'pcfg'} (probabilistic CFGs) - C{'fcfg'} (feature-based CFGs) - C{'fol'} (formulas of First Order Logic) - C{'val'} (valuation of First Order Logic model) - C{'raw'} If no format is specified, C{load()} will attempt to determine a format based on the resource name's file extension. If that fails, C{load()} will raise a C{ValueError} exception. @type resource_url: C{str} @param resource_url: A URL specifying where the resource should be loaded from. The default protocol is C{"nltk:"}, which searches for the file in the the NLTK data package. @type cache: C{bool} @param cache: If true, add this resource to a cache. If C{load} finds a resource in its cache, then it will return it from the cache rather than loading it. The cache uses weak references, so a resource wil automatically be expunged from the cache when no more objects are using it. @type verbose: C{bool} @param verbose: If true, print a message when loading a resource. Messages are not displayed when a resource is retrieved from the cache. """ # If we've cached the resource, then just return it. if cache: resource_val = _resource_cache.get(resource_url) if resource_val is not None: if verbose: print '<<Using cached copy of %s>>' % (resource_url,) return resource_val # Let the user know what's going on. if verbose: print '<<Loading %s>>' % (resource_url,) # Determine the format of the resource. if format == 'auto': if resource_url.endswith('.pickle'): format = 'pickle' if resource_url.endswith('.yaml'): format = 'yaml' if resource_url.endswith('.cfg'): format = 'cfg' if resource_url.endswith('.pcfg'): format = 'pcfg' if resource_url.endswith('.fcfg'): format = 'fcfg' if resource_url.endswith('.fol'): format = 'fol' if resource_url.endswith('.val'): format = 'val' # Load the resource. if format == 'pickle': resource_val = pickle.load(_open(resource_url)) elif format == 'yaml': resource_val = yaml.load(_open(resource_url)) elif format == 'cfg': resource_val = cfg.parse_cfg(_open(resource_url).read()) elif format == 'pcfg': resource_val = cfg.parse_pcfg(_open(resource_url).read()) elif format == 'fcfg': resource_val = cfg.parse_fcfg(_open(resource_url).read()) elif format == 'fol': resource_val = sem.parse_fol(_open(resource_url).read()) elif format == 'val': resource_val = sem.parse_valuation(_open(resource_url).read()) elif format == 'raw': resource_val = _open(resource_url).read() else: raise ValueError('Unknown format type!') # If requested, add it to the cache. if cache: try: _resource_cache[resource_url] = resource_val except TypeError: # We can't create weak references to some object types, like # strings and tuples. For now, just don't cache them. pass return resource_val
for frag2 in _generate_all(grammar, items[1:]): for frag in _multiply(frag1, frag2): frags.append(frag) return frags def _multiply(frag1, frag2): frags = [] if len(frag1) == 1: frag1 = [frag1] if len(frag2) == 1: frag2 = [frag2] for f1 in frag1: for f2 in frag2: frags.append(f1+f2) return frags grammar = cfg.parse_cfg(""" S -> NP VP NP -> Det N VP -> V NP Det -> 'the' Det -> 'a' N -> 'man' | 'park' | 'dog' | 'telescope' V -> 'saw' | 'walked' P -> 'in' | 'with' """) for sent in generate(grammar): print sent
for frag in _multiply(frag1, frag2): frags.append(frag) return frags def _multiply(frag1, frag2): frags = [] if len(frag1) == 1: frag1 = [frag1] if len(frag2) == 1: frag2 = [frag2] for f1 in frag1: for f2 in frag2: frags.append(f1 + f2) return frags grammar = cfg.parse_cfg(""" S -> NP VP NP -> Det N VP -> V NP Det -> 'the' Det -> 'a' N -> 'man' | 'park' | 'dog' | 'telescope' V -> 'saw' | 'walked' P -> 'in' | 'with' """) for sent in generate(grammar): print sent
def load(resource_url, format='auto', cache=True, verbose=False): """ Load a given resource from the NLTK data package. The following resource formats are currently supported: - C{'pickle'} - C{'yaml'} - C{'cfg'} (context free grammars) - C{'pcfg'} (probabilistic CFGs) - C{'fcfg'} (feature-based CFGs) - C{'fol'} (formulas of First Order Logic) - C{'val'} (valuation of First Order Logic model) - C{'raw'} If no format is specified, C{load()} will attempt to determine a format based on the resource name's file extension. If that fails, C{load()} will raise a C{ValueError} exception. @type resource_url: C{str} @param resource_url: A URL specifying where the resource should be loaded from. The default protocol is C{"nltk:"}, which searches for the file in the the NLTK data package. @type cache: C{bool} @param cache: If true, add this resource to a cache. If C{load} finds a resource in its cache, then it will return it from the cache rather than loading it. The cache uses weak references, so a resource wil automatically be expunged from the cache when no more objects are using it. @type verbose: C{bool} @param verbose: If true, print a message when loading a resource. Messages are not displayed when a resource is retrieved from the cache. """ # If we've cached the resource, then just return it. if cache: resource_val = _resource_cache.get(resource_url) if resource_val is not None: if verbose: print '<<Using cached copy of %s>>' % (resource_url, ) return resource_val # Let the user know what's going on. if verbose: print '<<Loading %s>>' % (resource_url, ) # Determine the format of the resource. if format == 'auto': if resource_url.endswith('.pickle'): format = 'pickle' if resource_url.endswith('.yaml'): format = 'yaml' if resource_url.endswith('.cfg'): format = 'cfg' if resource_url.endswith('.pcfg'): format = 'pcfg' if resource_url.endswith('.fcfg'): format = 'fcfg' if resource_url.endswith('.fol'): format = 'fol' if resource_url.endswith('.val'): format = 'val' # Load the resource. if format == 'pickle': resource_val = pickle.load(_open(resource_url)) elif format == 'yaml': resource_val = yaml.load(_open(resource_url)) elif format == 'cfg': resource_val = cfg.parse_cfg(_open(resource_url).read()) elif format == 'pcfg': resource_val = cfg.parse_pcfg(_open(resource_url).read()) elif format == 'fcfg': resource_val = cfg.parse_fcfg(_open(resource_url).read()) elif format == 'fol': resource_val = sem.parse_fol(_open(resource_url).read()) elif format == 'val': resource_val = sem.parse_valuation(_open(resource_url).read()) elif format == 'raw': resource_val = _open(resource_url).read() else: raise ValueError('Unknown format type!') # If requested, add it to the cache. if cache: try: _resource_cache[resource_url] = resource_val except TypeError: # We can't create weak references to some object types, like # strings and tuples. For now, just don't cache them. pass return resource_val