def markup_to_schemas(lines, cat=None, source=None): unannotated = False if lines == []: unannotated = True else: for line in lines[1:]: if "\\" in line or "/" in line: cat_to_print = lines[0].strip().split()[1] cat_to_print = category.strip_braces(cat_to_print) cat_to_print = "".join(cat_to_print.split("[X]")) cat_to_print = ANGLE_RE.sub("", cat_to_print) cat_to_print = category.remove_extra_brackets(cat_to_print) print >> log_out, "Unannotated category:", cat_to_print print >> sys.stderr, "Unannotated category:", cat_to_print unannotated = True break if unannotated: lines = fallback_schema(cat) pos = None word = None if source is not None: pos = source.pos word = source.word used = False nlines = [] for i in xrange(1, len(lines)): line = lines[i].strip() if line[-1] not in ")}": use = True if "POS" in line: if pos is None or pos not in line.split("POS:")[1].split()[0].split(","): use = False if not used and "POS:default" in line: use = True if "Word" in line: if word is None or word not in line.split("Word:")[1].split()[0].split(","): use = False if not used and "Word:default" in line: use = True if use: nlines.append(line) if "arg" not in line or "arg:default:" in line: used = True else: nlines.append(line) used = False if "POS:default" in line or "Word:default" in line: if "arg" not in line or "arg:default:" in line: used = False return Schema(nlines, source_node=source)
def get_unary(start_cat, end_cat, markedup=None): # Note: PP_qus - for questions only, ignored for now for unary in UNARIES: start = unary[0] end_markup = unary[1] end = category.strip_braces(end_markup) keep_deps = unary[2] extra = unary[3] rules = unary[4] if category.compare(start_cat, start): if category.compare(end_cat, end): if len(rules) > 0: return rules elif markedup is not None: if end in markedup: return markedup[end][1:] end_no_brac = category.strip_square_brackets(end) if end_no_brac in markedup: return markedup[end_no_brac][1:] else: return [] return None