Exemplo n.º 1
0
def handle_lines(lines,writer):
    current_obj = None
    current_score = None
    for line in lines:
        line = line.strip()
        if line and not line.startswith('['):
            obj = json.loads(line)
            if current_obj is None:
                current_obj = obj
                current_score = obj['weight']
                obj['surfaceRel'] = obj['rel']
            elif obj['weight'] == current_score:
                if normalize(obj['arg1']) == normalize(current_obj['arg1']) and normalize(obj['arg2']) == normalize(current_obj['arg2']):
                    current_obj['rel'] = obj['rel']
                output_edge(current_obj,writer)
                current_obj = None
                current_score = None
            else:
                if current_obj is not None:
                    output_edge(current_obj,writer)
                current_obj = obj
                current_score = obj['weight']
                obj['surfaceRel'] = obj['rel']
    if current_obj is not None:
        output_edge(current_obj,writer)

    writer.close()
Exemplo n.º 2
0
def handle_lines(lines, writer):
    current_obj = None
    current_score = None
    for line in lines:
        line = line.strip()
        if line and not line.startswith('['):
            obj = json.loads(line)
            if current_obj is None:
                current_obj = obj
                current_score = obj['weight']
                obj['surfaceRel'] = obj['rel']
            elif obj['weight'] == current_score:
                if normalize(obj['arg1']) == normalize(
                        current_obj['arg1']) and normalize(
                            obj['arg2']) == normalize(current_obj['arg2']):
                    current_obj['rel'] = obj['rel']
                output_edge(current_obj, writer)
                current_obj = None
                current_score = None
            else:
                if current_obj is not None:
                    output_edge(current_obj, writer)
                current_obj = obj
                current_score = obj['weight']
                obj['surfaceRel'] = obj['rel']
    if current_obj is not None:
        output_edge(current_obj, writer)

    writer.close()
Exemplo n.º 3
0
def make_concept_uri(text, lang, disambiguation=None):
    text = ftfy.ftfy(text).strip()
    if disambiguation is None:
        text, disambiguation = handle_disambig(text)
    if disambiguation is not None:
        if isinstance(disambiguation, str):
            disambiguation = disambiguation.decode("utf-8")
        disambiguation = ftfy.ftfy(disambiguation)

    if lang == "en":
        normalized = english.normalize(text)
    elif lang == "ja" and disambiguation is not None:
        match = re.search(r"\((.*?)\)", disambiguation)
        if match:
            parenthesized = match.group(1)
            pos, rest = disambiguation.split("/", 1)
            if parenthesized in JAPANESE_PARTS_OF_SPEECH:
                pos = JAPANESE_PARTS_OF_SPEECH[parenthesized]
            else:
                pos = "n"
            disambiguation = pos + "/" + re.sub(r"\s*\((.*?)\)\s*", "", rest)
        normalized = preprocess_text(text).lower()
    else:
        normalized = preprocess_text(text).lower()

    if disambiguation is not None:
        disambiguation = disambiguation.strip().replace(" ", "_").lower()
    if disambiguation:
        return "/c/%s/%s/%s" % (lang, normalized.replace(" ", "_"), disambiguation)
    else:
        return "/c/%s/%s" % (lang, normalized.replace(" ", "_"))
Exemplo n.º 4
0
def make_concept_uri(text, lang, disambiguation=None):
    text = ftfy.ftfy(text)
    if disambiguation is None:
        text, disambiguation = handle_disambig(text)
    if disambiguation is not None:
        if isinstance(disambiguation, str):
            disambiguation = disambiguation.decode('utf-8')
        disambiguation = ftfy.ftfy(disambiguation)

    if lang == 'en':
        normalized = english.normalize(text)
    elif lang == 'ja' and disambiguation is not None:
        match = re.search(r'\((.*?)\)', disambiguation)
        if match:
            parenthesized = match.group(1)
            pos, rest = disambiguation.split('/', 1)
            if parenthesized in JAPANESE_PARTS_OF_SPEECH:
                pos = JAPANESE_PARTS_OF_SPEECH[parenthesized]
            else:
                pos = 'n'
            disambiguation = pos + '/' + re.sub(r'\s*\((.*?)\)\s*', '', rest)
        normalized = preprocess_text(text).lower()
    else:
        normalized = preprocess_text(text).lower()

    if disambiguation is not None:
        disambiguation = disambiguation.replace(' ', '_')
    if disambiguation:
        return '/c/%s/%s/%s' % (lang, normalized.replace(' ', '_'), disambiguation)
    else:
        return '/c/%s/%s' % (lang, normalized.replace(' ', '_'))
Exemplo n.º 5
0
def search():
    keyword = request.form.get('keyword')
    lang = request.form.get('language')
    keyword = normalize(keyword)
    return redirect('%sconcept/%s/%s' % (web_route, lang, keyword))