Exemplo n.º 1
0
        #   one for the rest of possessives
        #   one for punctuation
        r"[-\w]+|'\w+|[^-\w\s]+",
        tokenstring,
        re.U # Use unicode classes, otherwise we would split
             # "são jaques" into ["s", "ão","jaques"]
        )

grammar=parse_grammar("""
S -> NP VP
NP -> Det Nom | Nom | NP PP
Det -> NP "'s"
Nom -> N SRel | N
VP -> Vi | Vt NP | VP PP
PP -> Prep NP
SRel -> Relpro VP
Det -> 'a' | 'the'
N -> 'fish' | 'frogs' | 'soup' | 'children' | 'books'
Prep -> 'in' | 'for'
Vt -> 'saw' | 'ate' | 'read'
Vi -> 'fish' | 'swim'
Relpro -> 'that'
""")

# Use this grammar for the rest of the assignment
grammar2=parse_grammar([
"S -> Sdecl '.' | Simp '.' | Sq '?' ",
"Sdecl -> NP VP",
"Simp -> VP",
"Sq -> Sqyn | Swhadv",
"Sqyn -> Mod Sdecl | Aux Sdecl",
Exemplo n.º 2
0
def tokenise(tokenstring):
  '''Split a string into a list of tokens, treating punctuation as
  separate tokens, and splitting contractions into their parts.
  So for example "I'm leaving." --> ["I","'m","leaving","."]'''
    return re.findall(r"[a-zA-Z]+|'[a-z]+|[,.?;:()-]", tokenstring)


grammar=parse_grammar("""
S -> NP VP
NP -> Det Nom | Nom | NP PP
Det -> NP "'s"
Nom -> N SRel | N
VP -> Vi | Vt NP | VP PP
PP -> Prep NP
SRel -> Relpro VP
Det -> 'a' | 'the'
N -> 'fish' | 'frogs' | 'soup' | 'children' | 'books'
Prep -> 'in' | 'for'
Vt -> 'saw' | 'ate' | 'read'
Vi -> 'fish' | 'swim'
Relpro -> 'that'
""")

print grammar
chart=CKY(grammar)
chart.parse("the frogs swim".split()) # Should use
                                      # tokenise(s) once that's fixed
chart.pprint()
# Use this grammar for the rest of the assignment
Exemplo n.º 3
0
        # You will need three sub-patterns:
        #   one for words and the first half of possessives
        #   one for the rest of possessives
        #   one for punctuation
        r'\b[a-zA-Z]+|\'?[a-zA-Z]+|[^ ]+',
        tokenstring)


grammar = parse_grammar("""
S -> NP VP
NP -> Det Nom | Nom | NP PP
Det -> NP "'s"
Nom -> N SRel | N
VP -> Vi | Vt NP | VP PP
PP -> Prep NP
SRel -> Relpro VP
Det -> 'a' | 'the'
N -> 'fish' | 'frogs' | 'soup' | 'children' | 'books'
Prep -> 'in' | 'for'
Vt -> 'saw' | 'ate' | 'read'
Vi -> 'fish' | 'swim'
Relpro -> 'that'
""")

print(grammar)
chart = CKY(grammar)
chart.recognise("the frogs swim".split())  # Should use
# tokenise(s) once that's fixed
chart.pprint()

# Q1: Uncomment this once you've completed Q1
Exemplo n.º 4
0
        mods = dict(
            zip(modNames[:len(modFilenames)],
                [importlib.import_module(n) for n in modFilenames]))
    except (ModuleNotFoundError, ImportError) as e:
        print("Filenames must be importable: %s" % e, file=sys.stderr)
        exit(2)

from cfg_fix import parse_grammar, Tree
grammar = parse_grammar("""
S -> NP VP
NP -> Det Nom | Nom | NP PP
Det -> NP "'s"
Nom -> N SRel | N
VP -> Vi | Vt NP | VP PP
PP -> Prep NP
SRel -> Relpro VP
Det -> 'a' | 'the'
N -> 'fish' | 'frogs' | 'soup' | 'children' | 'books'
Prep -> 'in' | 'for'
Vt -> 'saw' | 'ate' | 'read'
Vi -> 'fish' | 'swim'
Relpro -> 'that'
""")  #'
chart = mods['cky'].CKY(grammar)


def callCount(modName, methodName):
    # attempt to count calls to this method
    pat = re.compile('[^#]*\.' + methodName + '\(')
    return sum(1 for l in inspect.getsourcelines(mods[modName])[0]
               if pat.match(l))