def _check_synonymous_relationship_4_0(paylist): broaderlist = [] wk_error_term = [] # 4-0 Check the broader term of preferred labels within and between preferred labels for item in paylist: wk_broader_term = item['broader_term'] if wk_broader_term is None: # In case the broader term does not exist _add_broader_list(broaderlist, item) else: lst = list(filter(lambda x: x['term'] == wk_broader_term, paylist)) if len(lst) == 1: _add_broader_list(broaderlist, item, lst[0]) # 4-0 error_preferred = [] for name in broaderlist: for nameb in broaderlist: if name['preferred_group'] != nameb['preferred_group']: continue if name['broader_preferred_label'] !=\ nameb['broader_preferred_label']: # Duplicate check aflg = False for er_preferred in error_preferred: if er_preferred == name['preferred_group']: aflg = True if not aflg: error_preferred.append(str(name['preferred_group'])) for er_preferred in error_preferred: wk_error_term.append(er_preferred) if len(wk_error_term) > 0: return wk_error_term, 409 return SuccessResponse('request is success.'), 200
def _insert_example_phrases(text_data): with psycopg2.connect(DB_CONNECT) as conn: with conn.cursor() as cur: # DB Truncate Table _truncate_example_phrases(cur) # DROP INDEX _drop_example_phrases(conn, cur) # ################################### # bulk insert # _bulk_insert_example_phrases(conn, cur, text_data) # insert _simple_insert_example_phrases(conn, cur, text_data) # ################################### # CREATE INDEX _create_index_example_phrases(conn, cur) return SuccessResponse('request is success.'), 200
def _copy_file_example_phrases(text_data): copy_res = _file_copy(text_data) if not copy_res: return ErrorResponse(0, 'Save File Error.'), 400 try: with psycopg2.connect(DB_CONNECT) as conn: with conn.cursor() as cur: # DB Truncate Table _truncate_example_phrases(cur) # DROP INDEX _drop_example_phrases(conn, cur) # DB file copy _copy_example_phrases(conn, cur, text_data.filename) # CREATE INDEX _create_index_example_phrases(conn, cur) except Exception as e: print(datetime.datetime.now(), '[_copy_file_example_phrases] Exception:', e, location()) _file_delete(text_data) return ErrorResponse(0, 'Copy File Error.'), 400 delete_res = _file_delete(text_data) if not delete_res: return ErrorResponse(0, 'Delete File Error.'), 400 return SuccessResponse('request is success.'), 200
def _exec_insert_postgrest(payload, url): if not payload: print('[_exec_insert_postgrest] end(payload error)', location(), datetime.datetime.now()) return ErrorResponse(0, 'Data Format Error.'), 400 else: psg_res = requests.delete(POSTGREST_BASE_URL + url) try: psg_res.raise_for_status() except requests.exceptions.RequestException as e: print(e) print('[_exec_insert_postgrest] DELETE', url, 'error', psg_res.reason, location(), datetime.datetime.now()) return ErrorResponse(0, psg_res.reason), psg_res.status_code splitPayload = list(split_list(payload, SPLIT_COUNT)) for i in range(len(splitPayload)): split = splitPayload[i] psg_res = requests.post(POSTGREST_BASE_URL + url, headers=HEADER, data=json.dumps(split)) try: psg_res.raise_for_status() except requests.exceptions.RequestException as e: print(e) print(datetime.datetime.now(), '[_exec_insert_postgrest] POST', url, 'error', i, '.', psg_res.reason, location()) return ErrorResponse(0, psg_res.reason), psg_res.status_code return SuccessResponse('request is success.'), 200
def _check_trem_format_synonymous_relationship(payload_s): # An item that does not contain a key term is considered an error. for index, item in payload_s.iterrows(): wk_preferred_label =\ item['用語名'] if pd.notnull(item['用語名']) else None if wk_preferred_label is None: return ErrorResponse(0, 'Data Format Error.'), 400 return SuccessResponse('request is success.'), 200
def _check_trem_format_reference_vocabulary(payload): # An item that does not contain a key term is considered an error. for item in payload: wk_preferred_label =\ item['term'] if pd.notnull(item['term']) else None if wk_preferred_label is None: return ErrorResponse(0, 'Data Format Error.'), 400 return SuccessResponse('request is success.'), 200
def _check_synonymous_relationship_3_0(paylist): # 3-0 Check the URI of preferred labels within and between preferred labels wk_error_term = [] for name in paylist: if name['group_uri'] != name['uri']: wk_error_group = name['preferred_group'] for name_er in paylist: if wk_error_group == name_er['preferred_group']: wk_error_term.append(name_er['term']) return wk_error_term, 409 return SuccessResponse('request is success.'), 200
def _check_synonymous_relationship_2_0(paylist): # 2-0 Check preferred labels within synonymous terms wk_error_term = [] for name in paylist: if name['preferred_group'] != name['preferred_label']: wk_error_group = name['preferred_group'] for name_er in paylist: if wk_error_group == name_er['preferred_group']: wk_error_term.append(name_er['term']) return wk_error_term, 409 return SuccessResponse('request is success.'), 200
def _check_columns(data_frame): # columns = '用語名 代表語 代表語のURI 上位語 同義語候補 上位語候補 品詞 x座標値 y座標値 色1 色2' # ins_f = lambda x:columns not in x for index, item in data_frame.iterrows(): # if any(map(ins_f, item)): if ('用語名' not in item or '代表語' not in item or '代表語のURI' not in item or '上位語' not in item or '同義語候補' not in item or '上位語候補' not in item or '品詞' not in item or 'x座標値' not in item or 'y座標値' not in item or '色1' not in item or '色2' not in item): return ErrorResponse(0, 'Data Format Error.'), 400 return SuccessResponse('request is success.'), 200
def _check_synonymous_relationship_4_1(payload_s, paylist): # 4-2 Check the broader term of preferred labels within and between preferred labels for name in paylist: wk_broader_term = name['broader_term'] if wk_broader_term is not None: # In case the broader term exists looplist = [] looplist.append(wk_broader_term) ret_flg = _chk_broader_term(payload_s, looplist, wk_broader_term) if ret_flg == 0: None elif ret_flg == 2: if len(looplist) > 0: return looplist, 409 return SuccessResponse('request is success.'), 200
def _check_synonymous_relationship_3_1(preferredlist): # 3-1 Check the URI of preferred labels within and between preferred labels (in case the URI of the preferred label does not exist) wk_error_term = [] for name in preferredlist: wk_group_uri = name['uri'] if pd.notnull(name['uri']) else None wk_preferred =\ name['preferred_label']\ if pd.notnull(name['preferred_label']) else None for nameb in preferredlist: wk_group_uri1b =\ nameb['uri'] if pd.notnull(nameb['uri']) else None wk_preferred1b =\ nameb['preferred_label']\ if pd.notnull(nameb['preferred_label']) else None if wk_group_uri is not None and\ wk_group_uri == wk_group_uri1b and\ wk_preferred != wk_preferred1b: for name_er in preferredlist: if wk_group_uri == name_er['uri']: wk_error_term.append(name_er['term']) return wk_error_term, 409 return SuccessResponse('request is success.'), 200
def upload_file(editing_vocabulary=None, reference_vocabulary1=None, reference_vocabulary2=None, reference_vocabulary3=None, example_phrases=None): # noqa: E501 """Upload the file to the server Uploads the file selected by the client to the server. When 'editing_vocabulary' uploaded, its check integrity. # noqa: E501 :param editing_vocabulary: :type editing_vocabulary: strstr :param reference_vocabulary1: :type reference_vocabulary1: strstr :param reference_vocabulary2: :type reference_vocabulary2: strstr :param reference_vocabulary3: :type reference_vocabulary3: strstr :param example_phrases: :type example_phrases: strstr :rtype: SuccessResponse """ if editing_vocabulary is not None: allow_extension, r_ext =\ _check_extensions(editing_vocabulary, VOCABULARY_ALLOWED_EXTENSIONS) if not allow_extension: print(datetime.datetime.now(), '[Error] failed _check_extensions', location()) return ErrorResponse(0, 'Data Format Error.'), 400 # Check Synonymous Relationship df = _read_file_strage(editing_vocabulary, r_ext) # Check columns exec_res, status_code = _check_columns(df) if not status_code == 200: print(datetime.datetime.now(), '[Error] failed _check_columns', location()) return exec_res, status_code _repair_broader_term(df) exec_res, status_code = _check_synonymous_relationship(df) if not status_code == 200: print(datetime.datetime.now(), '[Error] failed _check_synonymous_relationship', location()) return exec_res, status_code payload = _make_bulk_data_editing_vocabulary(df) exec_res, status_code =\ _exec_insert_postgrest(payload, 'editing_vocabulary') if not status_code == 200: print(datetime.datetime.now(), '[Error] failed _exec_insert_postgrest', location()) return exec_res, status_code if reference_vocabulary1 is not None: allow_extension, r_ext =\ _check_extensions(reference_vocabulary1, VOCABULARY_ALLOWED_EXTENSIONS) if not allow_extension: print(datetime.datetime.now(), '[Error] failed _check_extensions', location()) return ErrorResponse(0, 'Data Format Error.'), 400 payload =\ _make_bulk_data_reference_vocabulary(reference_vocabulary1, r_ext) # format check exec_res, status_code =\ _check_trem_format_reference_vocabulary(payload) if not status_code == 200: print(datetime.datetime.now(), '[Error] failed _check_trem_format_reference_vocabulary', location()) return ErrorResponse(0, 'Data Format Error.'), 400 exec_res, status_code =\ _exec_insert_postgrest(payload, 'reference_vocabulary_1') if not status_code == 200: print(datetime.datetime.now(), '[Error] failed _exec_insert_postgrest', location()) return exec_res, status_code if reference_vocabulary2 is not None: allow_extension, r_ext =\ _check_extensions(reference_vocabulary2, VOCABULARY_ALLOWED_EXTENSIONS) if not allow_extension: print(datetime.datetime.now(), '[Error] failed _check_extensions', location()) return ErrorResponse(0, 'Data Format Error.'), 400 payload =\ _make_bulk_data_reference_vocabulary(reference_vocabulary2, r_ext) # format check exec_res, status_code =\ _check_trem_format_reference_vocabulary(payload) if not status_code == 200: print(datetime.datetime.now(), '[Error] failed _check_trem_format_reference_vocabulary', location()) return ErrorResponse(0, 'Data Format Error.'), 400 exec_res, status_code =\ _exec_insert_postgrest(payload, 'reference_vocabulary_2') if not status_code == 200: print(datetime.datetime.now(), '[Error] failed _exec_insert_postgrest', location()) return exec_res, status_code if reference_vocabulary3 is not None: allow_extension, r_ext =\ _check_extensions(reference_vocabulary3, VOCABULARY_ALLOWED_EXTENSIONS) if not allow_extension: print(datetime.datetime.now(), '[Error] failed _check_extensions', location()) return ErrorResponse(0, 'Data Format Error.'), 400 payload =\ _make_bulk_data_reference_vocabulary(reference_vocabulary3, r_ext) # format check exec_res, status_code =\ _check_trem_format_reference_vocabulary(payload) if not status_code == 200: print(datetime.datetime.now(), '[Error] failed _check_trem_format_reference_vocabulary', location()) return ErrorResponse(0, 'Data Format Error.'), 400 exec_res, status_code =\ _exec_insert_postgrest(payload, 'reference_vocabulary_3') if not status_code == 200: print(datetime.datetime.now(), '[Error] failed _exec_insert_postgrest', location()) return exec_res, status_code if example_phrases is not None: allow_extension, r_ext =\ _check_extensions(example_phrases, PHRASES_ALLOWED_EXTENSIONS) if not allow_extension: print(datetime.datetime.now(), '[Error] failed _check_extensions', location()) return ErrorResponse(0, 'Data Format Error.'), 400 # exec_res, status_code = _copy_file_example_phrases(example_phrases) # if not status_code == 200: # _file_delete(example_phrases) # return exec_res, status_code exec_res, status_code = _insert_example_phrases(example_phrases) if not status_code == 200: print(datetime.datetime.now(), '[Error] failed _insert_example_phrases', location()) _file_delete(example_phrases) return exec_res, status_code return SuccessResponse('request is success.')
def _check_synonymous_relationship(df): preferred_group = '' group_uri = '' paylist = [] preferredlist = [] # format check exec_res, status_code = _check_trem_format_synonymous_relationship(df) if not status_code == 200: print(datetime.datetime.now(), '[Error] _check_trem_format_synonymous_relationship failed ', location()) return ErrorResponse(0, 'Data Format Error.'), 400 # 1-1 Extraction of synonymous relationship # sort payload_s = df.sort_values('代表語') # Only the preferred labels to be a Key is picked up and a list is created. for index, item in payload_s.iterrows(): wk_preferred = item['代表語'] if pd.notnull(item['代表語']) else None if preferred_group != wk_preferred: preferred_group = wk_preferred group_uri = item['代表語のURI'] if pd.notnull( item['代表語のURI']) else None # Recursive call looplist = [] looplist.append(wk_preferred) ret_flg = _chk_preferred_group(payload_s, preferredlist, looplist, wk_preferred) if ret_flg == 1: _add_preferred_list(preferredlist, item) # Make group lists for every preferred label preferred_group = '' group_uri = '' for name in preferredlist: wk_preferred =\ name['preferred_label']\ if pd.notnull(name['preferred_label']) else None if preferred_group != wk_preferred: preferred_group = wk_preferred group_uri = name['uri'] if pd.notnull(name['uri']) else None # Recursive call looplist = [] looplist.append(wk_preferred) _chk_preferred_list_group(payload_s, paylist, looplist, preferred_group, group_uri, wk_preferred) # Check Synonymous Relationship exec_res, status_code = _check_synonymous_relationship_2_0(paylist) if not status_code == 200: print(datetime.datetime.now(), '[Error] _check_synonymous_relationship_2_0 failed ', location()) return CheckErrorResponse(2, exec_res, 0), status_code exec_res, status_code = _check_synonymous_relationship_3_0(paylist) if not status_code == 200: print(datetime.datetime.now(), '[Error] _check_synonymous_relationship_3_0 failed ', location()) return CheckErrorResponse(3, exec_res, 1), status_code exec_res, status_code =\ _check_synonymous_relationship_3_1(preferredlist) if not status_code == 200: print(datetime.datetime.now(), '[Error] _check_synonymous_relationship_3_1 failed ', location()) return CheckErrorResponse(3, exec_res, 2), status_code exec_res, status_code = _check_synonymous_relationship_4_0(paylist) if not status_code == 200: print(datetime.datetime.now(), '[Error] _check_synonymous_relationship_4_0 failed ', location()) return CheckErrorResponse(4, exec_res, 0), status_code exec_res, status_code =\ _check_synonymous_relationship_4_1(payload_s, paylist) if not status_code == 200: print(datetime.datetime.now(), '[Error] _check_synonymous_relationship_4_1 failed ', location()) return CheckErrorResponse(4, exec_res, 1), status_code return SuccessResponse('request is success.'), 200