def markup_to_schemas(lines, cat=None, source=None):
    unannotated = False
    if lines == []:
        unannotated = True
    else:
        for line in lines[1:]:
            if "\\" in line or "/" in line:
                cat_to_print = lines[0].strip().split()[1]
                cat_to_print = category.strip_braces(cat_to_print)
                cat_to_print = "".join(cat_to_print.split("[X]"))
                cat_to_print = ANGLE_RE.sub("", cat_to_print)
                cat_to_print = category.remove_extra_brackets(cat_to_print)
                print >> log_out, "Unannotated category:", cat_to_print
                print >> sys.stderr, "Unannotated category:", cat_to_print
                unannotated = True
                break
    if unannotated:
        lines = fallback_schema(cat)
    pos = None
    word = None
    if source is not None:
        pos = source.pos
        word = source.word
    used = False
    nlines = []
    for i in xrange(1, len(lines)):
        line = lines[i].strip()
        if line[-1] not in ")}":
            use = True
            if "POS" in line:
                if pos is None or pos not in line.split("POS:")[1].split()[0].split(","):
                    use = False
                if not used and "POS:default" in line:
                    use = True
            if "Word" in line:
                if word is None or word not in line.split("Word:")[1].split()[0].split(","):
                    use = False
                if not used and "Word:default" in line:
                    use = True
            if use:
                nlines.append(line)
                if "arg" not in line or "arg:default:" in line:
                    used = True
        else:
            nlines.append(line)
            used = False
        if "POS:default" in line or "Word:default" in line:
            if "arg" not in line or "arg:default:" in line:
                used = False
    return Schema(nlines, source_node=source)
示例#2
0
def get_unary(start_cat, end_cat, markedup=None):
    # Note: PP_qus - for questions only, ignored for now
    for unary in UNARIES:
        start = unary[0]
        end_markup = unary[1]
        end = category.strip_braces(end_markup)
        keep_deps = unary[2]
        extra = unary[3]
        rules = unary[4]
        if category.compare(start_cat, start):
            if category.compare(end_cat, end):
                if len(rules) > 0:
                    return rules
                elif markedup is not None:
                    if end in markedup:
                        return markedup[end][1:]
                    end_no_brac = category.strip_square_brackets(end)
                    if end_no_brac in markedup:
                        return markedup[end_no_brac][1:]
                else:
                    return []
    return None
示例#3
0
def get_unary(start_cat, end_cat, markedup=None):
	# Note: PP_qus - for questions only, ignored for now
	for unary in UNARIES:
		start = unary[0]
		end_markup = unary[1]
		end = category.strip_braces(end_markup)
		keep_deps = unary[2]
		extra = unary[3]
		rules = unary[4]
		if category.compare(start_cat, start):
			if category.compare(end_cat, end):
				if len(rules) > 0:
					return rules
				elif markedup is not None:
					if end in markedup:
						return markedup[end][1:]
					end_no_brac = category.strip_square_brackets(end)
					if end_no_brac in markedup:
						return markedup[end_no_brac][1:]
				else:
					return []
	return None