def parsePrimitiveCategory(chunks, primitives, families, var): """ Parse a primitive category If the primitive is the special category 'var', replace it with the correct `CCGVar`. """ if chunks[0] == "var": if chunks[1] is None: if var is None: var = CCGVar() return (var, var) catstr = chunks[0] if catstr in families: (cat, cvar) = families[catstr] if var is None: var = cvar else: cat = cat.substitute([(cvar, var)]) return (cat, var) if catstr in primitives: subscrs = parseSubscripts(chunks[1]) return (PrimitiveCategory(catstr, subscrs), var) raise AssertionError('String \'' + catstr + '\' is neither a family nor primitive category.')
def fromstring(lex_str, include_semantics=False): """ Convert string representation into a lexicon for CCGs. """ CCGVar.reset_id() primitives = [] families = {} entries = defaultdict(list) for line in lex_str.splitlines(): # Strip comments and leading/trailing whitespace. line = COMMENTS_RE.match(line).groups()[0].strip() if line == "": continue if line.startswith(':-'): # A line of primitive categories. # The first one is the target category # ie, :- S, N, NP, VP primitives = primitives + [ prim.strip() for prim in line[2:].strip().split(',') ] else: # Either a family definition, or a word definition (ident, sep, rhs) = LEX_RE.match(line).groups() (catstr, semantics_str) = RHS_RE.match(rhs).groups() (cat, var) = augParseCategory(catstr, primitives, families) if sep == '::': # Family definition # ie, Det :: NP/N families[ident] = (cat, var) else: semantics = None if include_semantics is True: if semantics_str is None: raise AssertionError( line + " must contain semantics because include_semantics is set to True" ) else: semantics = Expression.fromstring( SEMANTICS_RE.match(semantics_str).groups()[0] ) # Word definition # ie, which => (N\N)/(S/NP) entries[ident].append(Token(ident, cat, semantics)) return CCGLexicon(primitives[0], primitives, families, entries)