def handle_lines(lines,writer): current_obj = None current_score = None for line in lines: line = line.strip() if line and not line.startswith('['): obj = json.loads(line) if current_obj is None: current_obj = obj current_score = obj['weight'] obj['surfaceRel'] = obj['rel'] elif obj['weight'] == current_score: if normalize(obj['arg1']) == normalize(current_obj['arg1']) and normalize(obj['arg2']) == normalize(current_obj['arg2']): current_obj['rel'] = obj['rel'] output_edge(current_obj,writer) current_obj = None current_score = None else: if current_obj is not None: output_edge(current_obj,writer) current_obj = obj current_score = obj['weight'] obj['surfaceRel'] = obj['rel'] if current_obj is not None: output_edge(current_obj,writer) writer.close()
def handle_lines(lines, writer): current_obj = None current_score = None for line in lines: line = line.strip() if line and not line.startswith('['): obj = json.loads(line) if current_obj is None: current_obj = obj current_score = obj['weight'] obj['surfaceRel'] = obj['rel'] elif obj['weight'] == current_score: if normalize(obj['arg1']) == normalize( current_obj['arg1']) and normalize( obj['arg2']) == normalize(current_obj['arg2']): current_obj['rel'] = obj['rel'] output_edge(current_obj, writer) current_obj = None current_score = None else: if current_obj is not None: output_edge(current_obj, writer) current_obj = obj current_score = obj['weight'] obj['surfaceRel'] = obj['rel'] if current_obj is not None: output_edge(current_obj, writer) writer.close()
def make_concept_uri(text, lang, disambiguation=None): text = ftfy.ftfy(text).strip() if disambiguation is None: text, disambiguation = handle_disambig(text) if disambiguation is not None: if isinstance(disambiguation, str): disambiguation = disambiguation.decode("utf-8") disambiguation = ftfy.ftfy(disambiguation) if lang == "en": normalized = english.normalize(text) elif lang == "ja" and disambiguation is not None: match = re.search(r"\((.*?)\)", disambiguation) if match: parenthesized = match.group(1) pos, rest = disambiguation.split("/", 1) if parenthesized in JAPANESE_PARTS_OF_SPEECH: pos = JAPANESE_PARTS_OF_SPEECH[parenthesized] else: pos = "n" disambiguation = pos + "/" + re.sub(r"\s*\((.*?)\)\s*", "", rest) normalized = preprocess_text(text).lower() else: normalized = preprocess_text(text).lower() if disambiguation is not None: disambiguation = disambiguation.strip().replace(" ", "_").lower() if disambiguation: return "/c/%s/%s/%s" % (lang, normalized.replace(" ", "_"), disambiguation) else: return "/c/%s/%s" % (lang, normalized.replace(" ", "_"))
def make_concept_uri(text, lang, disambiguation=None): text = ftfy.ftfy(text) if disambiguation is None: text, disambiguation = handle_disambig(text) if disambiguation is not None: if isinstance(disambiguation, str): disambiguation = disambiguation.decode('utf-8') disambiguation = ftfy.ftfy(disambiguation) if lang == 'en': normalized = english.normalize(text) elif lang == 'ja' and disambiguation is not None: match = re.search(r'\((.*?)\)', disambiguation) if match: parenthesized = match.group(1) pos, rest = disambiguation.split('/', 1) if parenthesized in JAPANESE_PARTS_OF_SPEECH: pos = JAPANESE_PARTS_OF_SPEECH[parenthesized] else: pos = 'n' disambiguation = pos + '/' + re.sub(r'\s*\((.*?)\)\s*', '', rest) normalized = preprocess_text(text).lower() else: normalized = preprocess_text(text).lower() if disambiguation is not None: disambiguation = disambiguation.replace(' ', '_') if disambiguation: return '/c/%s/%s/%s' % (lang, normalized.replace(' ', '_'), disambiguation) else: return '/c/%s/%s' % (lang, normalized.replace(' ', '_'))
def search(): keyword = request.form.get('keyword') lang = request.form.get('language') keyword = normalize(keyword) return redirect('%sconcept/%s/%s' % (web_route, lang, keyword))