def verify_auth(request): """ 验证用户-----现在是从固定列表中读取 :param request: :return: """ result = ini_result() user_list = {"user": "******"} is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' if is_ajax and request.method == 'GET': user = request.GET.get("username", None) password = request.GET.get("pwd", None) if user is None or password is None: result[RESULT_CODE] = -1 result[RESULT_MSG] = "invalid request parameters" else: pwd_in_database = user_list.get(user, None) if pwd_in_database is None: result[RESULT_CODE] = -1 result[RESULT_MSG] = "no registered user" elif pwd_in_database != password: result[RESULT_CODE] = -1 result[RESULT_MSG] = "unmatched password" else: result[RESULT_CODE] = 1 result[RESULT_MSG] = "matched user and password" result[RESULT_DATA] = {"user": user, "pwd": password} else: result[RESULT_CODE] = -1 result[RESULT_MSG] = "invalid request" return result
def parse_bib_file(file_name): """ 从bib文件中解析出文献信息. checked :param file_name: :return: """ result = ini_result() if file_name is None or not isinstance(file_name, str): result[RESULT_CODE] = -901 result[RESULT_MSG] = "invalid arguments" return result try: with open(file_name, encoding="utf-8") as bib_file: bib_database = bibtexparser.load(bib_file) except: result[RESULT_CODE] = -1100 result[RESULT_MSG] = "文件打开失败:【" + file_name + "】" return result if bib_database is not None: result[RESULT_CODE] = 1100 result[RESULT_MSG] = "成功解析bib文件【" + file_name + "】" result[RESULT_DATA] = bib_database.entries else: result[RESULT_CODE] = 1101 result[RESULT_MSG] = "未从bib文件中解析出结果!【" + file_name + "】" return result
def generate_pub_node_from_file(driver, file_name, sheet_name, column_name): """ 从文件中提取bib信息,然后写入neo4j.checked :param column_name: :param sheet_name: :param file_name: bib文件或Excel文件 :param driver: 数据库接口 :return: """ # 提取bib信息 result = ini_result() extract_result = extract_bib_info_from_file(file_name, sheet_name, column_name) # 结果是models类 if extract_result[RESULT_CODE] == 1003: extracted_data = extract_result[RESULT_DATA] # list of pubs db_result = create_or_match_nodes(extracted_data, driver) # 写入节点 if db_result[RESULT_CODE] != 1303: result[RESULT_CODE] = -1201 result[RESULT_MSG] = extract_result[RESULT_MSG] else: result[RESULT_CODE] = 1200 result[RESULT_MSG] = "success" else: print("不写入数据库:" + extract_result[RESULT_MSG]) result[RESULT_CODE] = -1200 result[RESULT_MSG] = extract_result[RESULT_MSG] return result
def upload_bib_add_record(request): result = ini_result() try: files = request.FILES.getlist('file') except: result[RESULT_MSG] = 'failed to retrieve file in the request' result[RESULT_CODE] = -701 return wrap_result(result) if files is None or len(files) == 0: result[RESULT_MSG] = 'No file in the request' result[RESULT_CODE] = -702 return wrap_result(result) driver = initialize_neo4j_driver() dir = os.path.join(os.path.dirname(__file__), 'upload_file') # 拼装目录名称+文件名称 file_not_processed = [] for file in files: today = str(datetime.date.today()) # 获得今天日期 filename = today + '_' + file.name # 获得上传来的文件名称,加入下划线分开日期和名称 file_path = save_file_stream_on_disk(file, dir, filename) # 处理上传来的文件 if file_path is None: file_not_processed.append(file) continue publication_info = extract_bib_info_from_file(file_path) # dict pubs = [] for entry in publication_info: # 解析文献 tmp_result_pub = extract_publication_from_bib_info(entry) if tmp_result_pub[RESULT_CODE] == 1001: pubs.append(tmp_result_pub[RESULT_DATA]) pubs = None if pubs == [] else pubs db_pub_result = create_or_match_nodes(driver, pubs, return_type="class", to_create=True) if db_pub_result[RESULT_CODE] != 1303: result[RESULT_CODE] = 00 result[RESULT_MSG] = "Publication节点生成失败" else: result[RESULT_CODE] = 00 result[RESULT_MSG] = "Publication节点生成成功" os.remove(file_path) if len(file_not_processed) > 0: result[RESULT_CODE] = -201 result[RESULT_MSG] = "not all files are written into database" result[RESULT_DATA] = file_not_processed else: result[RESULT_CODE] = 200 result[RESULT_MSG] = "success" return wrap_result(result)
def query_by_multiple_field(driver, node_info, node_type, page=None, limit=None, identifier="m"): """ 通用方法:给予节点的一个或多个字段来搜索节点信息。checked :param identifier: :param limit: :param page: :param driver: 数据库信息 :param node_info: dict :param node_type:数据库节点类型名 :return:返回的是dict,其中code:-1:没有传入数据;0:未搜索到数据;2:搜索到多条记录;1:搜索到1条记录 """ # 输入数据检测 result = ini_result() # 输入数据检查 if driver is None or node_info is None or not isinstance(node_info, dict) or \ (page is not None and not isinstance(page, int)) or (limit is not None and not isinstance(limit, int)): result[RESULT_CODE] = -901 result[RESULT_MSG] = "invalid arguments" return result # 查询条件 cond = generate_cypher_for_multi_field_condition(node_info, node_type, identifier) if cond is None: result[RESULT_CODE] = -904 result[RESULT_MSG] = "No cypher has been generated" return result if page is not None and limit is not None: paging = " skip " + str((page - 1) * limit) + " limit " + str(limit) cypher = "match ({IF}:{NODE_TYPE}) ".format( IF=identifier, NODE_TYPE=node_type) + cond + " return " + identifier + " " + paging # 查询数据库 try: with driver.session() as session: records = session.run(cypher) records = records.data() except: result[RESULT_CODE] = -910 result[RESULT_MSG] = "数据库连接失败" return result records = process_neo4j_result(records, identifier, flag=1) # 用match时 result[RESULT_MSG] = "success" result[RESULT_CODE] = 904 result[RESULT_DATA] = records if records is None: result["count"] = 0 else: result["count"] = len(records) return result
def query_by_multiple_field_count(driver, node_info, node_type, identifier="m"): """ 通用方法:根据给定的节点信息,查询有多少条数据满足条件。checked :param identifier: :param driver: :param node_info: dict :param node_type:数据库节点类型名 :return:返回的是dict,其中code:-1:没有传入数据;0:未搜索到数据;2:搜索到多条记录;1:搜索到1条记录 """ result = ini_result() result["count"] = -1 # 输入数据检测 if driver is None or node_info is None or not isinstance( node_info, dict) or node_type is None or node_type not in [ "PUBLICATION", "PERSON", "VENUE" ]: result[RESULT_CODE] = -901 result[RESULT_MSG] = "invalid arguments" return result # 查询条件 cond = generate_cypher_for_multi_field_condition(node_info, node_type, identifier) # 指定的查询条件 if cond is None: result[RESULT_CODE] = -904 result[RESULT_MSG] = "No cypher has been generated" return result cypher = "match ({IF}:{NODE_TYPE}) ".format( IF=identifier, NODE_TYPE=node_type) + cond + " return count({IF})".format( IF=identifier) # 查询数据库 try: with driver.session() as session: records = session.run(cypher) records = records.data() except: result[RESULT_CODE] = -910 result[RESULT_MSG] = "数据库连接失败" return result result[RESULT_MSG] = "success" result[RESULT_CODE] = 906 if records is None or len(records) == 0: result["count"] = 0 elif len(records) > 1: result["count"] = len(records) else: result["count"] = records[0]["count(m)"] return result
def add_relation(request): """ 向cypher添加relation :param request: :return: {RESULT_MSG: "", RESULT_CODE: 0}, 0:缺少参数;-1:参数格式错误;-10:请求方式错误,-2~-5见create_or_match_persons方法 """ result = ini_result() is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' if is_ajax and request.method == 'POST': node_info = request.body # 处理后是dict,直接传到后台写入数据库就可以了 if node_info is None or node_info == "": result[RESULT_CODE] = -301 result[RESULT_MSG] = "no data is given" return wrap_result(result) try: node_info = bytes.decode(node_info) node_info = json.loads(node_info) except json.JSONDecodeError or TypeError: result[RESULT_CODE] = -1 result[RESULT_MSG] = "given data is not a json string" return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8') # 调方法写数据库 source_id = node_info["sourceID"] source_type = node_info["sourceType"] target_id = node_info["targetID"] target_type = node_info["targetType"] rel_type = node_info["relType"] driver = initialize_neo4j_driver() query_result = query_or_create_relation(driver, source_type, source_id, target_type, target_id, rel_type) if query_result[RESULT_CODE] == 1306: result[RESULT_CODE] = 1 result[RESULT_MSG] = "successfully write into database" return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8') else: result[RESULT_CODE] = 0 result[ RESULT_MSG] = "error when writing into database: " + query_result[ RESULT_MSG] return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8') else: result[RESULT_CODE] = -10 result[RESULT_MSG] = "not supported request form" return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8')
def read_from_excel(file, sheet_name, column_specified): result = ini_result() try: wb = xlrd.open_workbook(filename=None, file_contents=file.read()) # 打开文件 except xlrd.XLRDError: result["code"] = -703 result[RESULT_MSG] = '打开Excel文件失败' return result # 读取工作表 try: sheet_content = wb.sheet_by_name(sheet_name) # 通过名字获取表格 except ValueError: result["code"] = -704 result[RESULT_MSG] = '读取Excel指定工作簿失败' return result # 读取表头 try: sheet_title = sheet_content.row_values(0) except: result["code"] = -705 result[RESULT_MSG] = '读取Excel工作簿表头失败' return result # 按列读取车站信息 if column_specified is not None and isinstance(column_specified, list): correspondence = {} for column_name in column_specified: try: col_index = sheet_title.index(column_name) except ValueError: print("Excel文件中未包含<" + column_name + ">列") continue correspondence[col_index] = column_name else: correspondence = {i: j for i, j in enumerate(sheet_title)} # 读取内容 content = [] for iii in range(1, sheet_content.nrows): # index是行下标 tmp = {"index": iii} for index, column_name in correspondence: value = sheet_content.cell_value(iii, index) if value == "": print("第" + str(iii) + "行文献没有" + column_name + "信息【" + str(sheet_content.row_values(iii)) + "】") tmp[column_name] = None else: tmp[column_name] = value content.append(tmp) result["code"] = 700 result[RESULT_MSG] = '读取Excel工作簿成功' result[RESULT_DATA] = content
def extract_rel_author_by_from_pub_info(publications): """ 从数据库中读取到的Publication信息的author字段,提取person信息,然后构建Publication-Authored_by-Person关系。checked :param publications: list of publication :return: """ result = ini_result() if publications is None or not isinstance(publications, list): result[RESULT_CODE] = -901 result[RESULT_MSG] = "the given data is not valid" return result flag = [ isinstance(publication, Publication) for publication in publications ] if not all(flag): result[RESULT_CODE] = -901 result[RESULT_MSG] = "the given data is not valid" return result unprocessed_pub_uuid = [] processed = [] for publication in publications: author_names = process_author_str(publication.author) if author_names is None: unprocessed_pub_uuid.append(publication) continue for author_name in author_names: person = Person("", author_name["full_name"], author_name["first_name"], author_name["middle_name"], author_name["last_name"]) processed.append({ "pub": publication, "venue": person, "index": author_name["index"] }) unprocessed_pub_uuid = None if unprocessed_pub_uuid == [] else unprocessed_pub_uuid processed = None if processed == [] else processed if unprocessed_pub_uuid is not None: result[RESULT_CODE] = 1010 result[RESULT_MSG] = "partially filtered" else: result[RESULT_CODE] = 1009 result[RESULT_MSG] = "success" result[RESULT_DATA] = { "failed": unprocessed_pub_uuid, "success": processed } return result
def search_publication_count(request): """ 根据搜索条件,计算有多少条数据满足条件. should be post and ajax :param request: :return: """ result = ini_result() result[RESULT_COUNT] = -1 is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' if not is_ajax or request.method != 'POST': result[RESULT_CODE] = -103 result[RESULT_MSG] = "not support request method, should be post" result["count"] = 0 return wrap_result(result) pub_info = request.body if pub_info is None or pub_info == "": result[RESULT_CODE] = -301 result[RESULT_MSG] = "no query condition is given" return wrap_result(result) try: pub_info = bytes.decode(pub_info) pub_info = json.loads(pub_info) except json.JSONDecodeError or TypeError: result[RESULT_CODE] = -603 result[RESULT_MSG] = "query condition should be json string" return wrap_result(result) if pub_info is None: result[RESULT_CODE] = -601 result[RESULT_MSG] = "no query condition is given" return wrap_result(result) # 封装数据为后台数据库能够接收的格式 parameters = process_search_condition(pub_info) if parameters is None: result[RESULT_CODE] = -604 result[RESULT_MSG] = "搜索条件解析失败,请重试" result["count"] = 0 return wrap_result(result) driver = initialize_neo4j_driver() query_result = utils.db_util.operations.query_by_multiple_field_count( driver, parameters, "PUBLICATION") # -1:没有传入数据;0:未搜索到数据;2:搜索到多条记录;1:搜索到1条记录 result[RESULT_CODE] = query_result[RESULT_CODE] result[RESULT_MSG] = query_result.get(RESULT_MSG, "") result["count"] = query_result.get("count", -1) return wrap_result(result)
def extract_rel_publish_in_from_pub_info(publications): """ 从数据库中读取到的Publication信息的journal或booktitle字段,提取venue信息,然后构建Publication-Published_in-Venue关系。checked :param publications: list of publication :return: """ result = ini_result() if publications is None or not isinstance(publications, list): result[RESULT_CODE] = -901 result[RESULT_MSG] = "the given data is not valid" return result flag = [ isinstance(publication, Publication) for publication in publications ] if not all(flag): result[RESULT_CODE] = -901 result[RESULT_MSG] = "the given data is not valid" return result unprocessed_pub_uuid = [] processed = [] for publication in publications: if publication.node_type not in FIELD_OF_PUBLICATION_FOR_VENUE.keys(): unprocessed_pub_uuid.append(publication.uuid) continue field = FIELD_OF_PUBLICATION_FOR_VENUE.get(publication.node_type) venue_name = publication.__dict__[field] venue_type = VENUE_TYPE_FOR_NODE_TYPE.get(publication.node_type) venue = Venue("", venue_type, venue_name) processed.append({"pub": publication, "venue": venue}) unprocessed_pub_uuid = None if unprocessed_pub_uuid == [] else unprocessed_pub_uuid processed = None if processed == [] else processed if unprocessed_pub_uuid is not None: result[RESULT_CODE] = 1010 result[RESULT_MSG] = "partially filtered" else: result[RESULT_CODE] = 1009 result[RESULT_MSG] = "success" result[RESULT_DATA] = { "failed": unprocessed_pub_uuid, "success": processed } return result
def extract_bib_info_from_file(file_path, sheet_name=None, column_name=None): """ 从文件中提取文献信息,并返回dict. checked :param column_name: 列名 :param file_path: 文件路径 :param sheet_name: 解析excel时需要的参数, 工作表名 :return: """ result = ini_result() # 解析文件后缀 ext = os.path.splitext(file_path)[-1] if ext is None: result[RESULT_CODE] = -1004 result[ RESULT_MSG] = "unable to extract he suffix of the file provided." return result # 按后缀名处理文件,获得文献的字符信息 if ext == '.bib': parse_result = parse_bib_file(file_path) elif ext in ['.xls', '.xlsx']: if sheet_name is None or column_name is None: result[RESULT_CODE] = -707 result[RESULT_MSG] = "excel未指定sheet and head" return result parse_result = parse_bib_file_excel(file_path, sheet_name, column_name) else: result[RESULT_CODE] = -1005 result[RESULT_MSG] = "无法处理的的文件类型!【" + file_path + "】" return result if parse_result[RESULT_CODE] not in [1100, 1101, 1103, 1104]: result[RESULT_CODE] = -1006 result[RESULT_MSG] = parse_result[RESULT_MSG] return result # 解析获得的bib信息 bib_data = parse_result[RESULT_DATA] if bib_data is None or len(bib_data) == 0: result[RESULT_CODE] = 1007 result[RESULT_MSG] = "文件中没有有效的文献数据" return result result[RESULT_CODE] = 1008 result[RESULT_MSG] = "success" result[RESULT_DATA] = bib_data return result
def revise_venue(request): """ 利用cypher修改person :param request: :return:{RESULT_MSG: "no data is given", RESULT_CODE: 0} 0:无参数,-1:参数格式错误, """ result = ini_result() is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' if is_ajax and request.method == 'POST': node_info = request.body if node_info is None or node_info == "": result[RESULT_CODE] = -301 result[RESULT_MSG] = "no data is given" return wrap_result(result) try: node_info = bytes.decode(node_info) node_info = json.loads(node_info) except json.JSONDecodeError or TypeError: return HttpResponse( json.dumps({ RESULT_MSG: "given data is not a json string", RESULT_CODE: -1 })) # 调方法写数据库 driver = initialize_neo4j_driver() flag = db_operation.revise_venues(driver, node_info) if flag == 1: return HttpResponse( json.dumps({ RESULT_MSG: "successfully write into database", RESULT_CODE: 1 })) else: return HttpResponse( json.dumps({ RESULT_MSG: "error when writing into database", RESULT_CODE: flag * 3 })) else: return HttpResponse( json.dumps({ RESULT_MSG: "not supported request form", RESULT_CODE: -2 }))
def add_venue(request): """ 向cypher添加venue :param request: :return: {RESULT_MSG: "", RESULT_CODE: 0}, 0:缺少参数;-1:参数格式错误;-10:请求方式错误,-2~-5见create_or_match_persons方法 """ result = ini_result() is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' if is_ajax and request.method == 'POST': node_info = request.body # 处理后是dict,直接传到后台写入数据库就可以了 if node_info is None or node_info == "": result[RESULT_CODE] = -301 result[RESULT_MSG] = "no data is given" return wrap_result(result) try: node_info = bytes.decode(node_info) node_info = json.loads(node_info) except json.JSONDecodeError or TypeError: result[RESULT_CODE] = -1 result[RESULT_MSG] = "given data is not a json string" return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8') # 调方法写数据库 driver = initialize_neo4j_driver() # 应先从node_info构建Venue,然后创建 venue = [Venue("", node_info)] db_ven_result = create_or_match_nodes(driver, venue, return_type="class", to_create=True) if db_ven_result[RESULT_CODE] != 1303: result[RESULT_CODE] = 00 result[RESULT_MSG] = "Venue节点生成失败" else: result[RESULT_CODE] = 1 result[RESULT_MSG] = "Venue节点生成成功" else: result[RESULT_CODE] = -10 result[RESULT_MSG] = "not supported request form" return wrap_result(result)
def parse_excel_stations(request): """ 从Excel文件中解析出车站信息,提取的结果是excel表中的各个数据 :param request: :return: """ result = ini_result() try: file = request.FILES.getlist('file') except: result[RESULT_MSG] = 'failed to retrieve file in the request' result[RESULT_CODE] = -701 return wrap_result(result) if file is None or len(file) == 0: result[RESULT_MSG] = 'No file in the request' result[RESULT_CODE] = -702 return wrap_result(result) # 打开Excel文件# todo support multiple files result = read_from_excel(file[0], SHEET_NAME, SHEET_TITLE) return wrap_result(result)
def extract_venue_from_bib_info(entry): """ 从节点信息中,提取出venue节点.checked :param entry: :return: """ result = ini_result() entry = capitalize_dict_keys(entry) if "VENUE_NAME" not in entry.keys() or len( entry["VENUE_NAME"]) == 0 or "VENUE_TYPE" not in entry.keys( ) or len(entry["VENUE_TYPE"]) == 0: result[RESULT_CODE] = -1007 result[RESULT_MSG] = "缺少必填字段" return result info = { field_name: process_special_character(entry.get(field_name, "")) for field_name in FIELD_NAMES_VENUE } node = Venue(uuid="", venue_name=info["VENUE_NAME"], abbr=info["ABBR"], venue_type=info["VENUE_TYPE"], publisher=info["PUBLISHER"], year=info["YEAR"], address=info["ADDRESS"], sci_index=info["SCI_INDEX"], ei_index=info["EI_INDEX"], ssci_index=info["SSCI_INDEX"], note=info["NOTE"], start_year=info["START_YEAR"]) result[RESULT_CODE] = 1005 result[RESULT_MSG] = "success" result[RESULT_DATA] = node return result
def extract_person_from_bib_info(entry): """ 从节点信息中,提取出Person节点。checked :param entry: :return: """ result = ini_result() entry = capitalize_dict_keys(entry) if "FULL_NAME" not in entry.keys() or len(entry["VENUE_NAME"]) == 0: result[RESULT_CODE] = -1008 result[RESULT_MSG] = "缺少必填字段" return result info = { field_name: process_special_character(entry.get(field_name, "")) for field_name in FIELD_NAMES_PERSON } node = Person(uuid="", full_name=info["FULL_NAME"], first_name=info["FIRST_NAME"], middle_name=info["MIDDLE_NAME"], last_name=info["LAST_NAME"], name_ch=info["NAME_CN"], first_name_ch=info["FIRST_NAME_CN"], last_name_ch=info["LAST_NAME_CN"], institution=info["INSTITUTION"], research_interest=info["RESEARCH_INTEREST"], note=info["NOTE"], added_by=info["ADDED_BY"], added_date=info["ADDED_DATE"]) result[RESULT_CODE] = 1006 result[RESULT_MSG] = "success" result[RESULT_DATA] = node return result
def query_or_create_relation(driver, source_type, source_id, target_type, target_id, relation_type, to_create=True, parameters=None): """ 先根据起终点的uuid查询关系是否存在,若存在,直接返回关系id,否则,创建关系并返回id。todo 设计边的属性,现在只有uuid和added_date :param relation_type: :param target_id: :param target_type: :param source_id: :param source_type: :param driver: :param to_create: :param parameters: dict,边属性 :return: -1:参数不完整,1:已存在,-2:不存在,且不创建,-3:指定边属性无效,-4:创建失败,2:生成成功 """ result = ini_result() # 检查输入数据 if source_type is None or source_id is None or target_id is None or target_type is None or relation_type is None: result["code"] = -901 result["msg"] = "输入参数不完整" return result if source_type not in NODE_TYPES or source_type not in NODE_TYPES or target_type not in NODE_TYPES or target_type not in NODE_TYPES: result[RESULT_CODE] = -1202 result[RESULT_MSG] = "node type is not valid" return result if relation_type not in EDGE_TYPES: result[RESULT_CODE] = -1203 result[RESULT_MSG] = "edge type is not valid" return result if parameters is not None and not isinstance(parameters, dict): result["code"] = -901 result["msg"] = "指定边属性不是dict,取消生成节点" return result # 生成查询语句 cypher = "MATCH (s:{source}) -[r:{rel}]-> (t:{target}) where s.uuid='{IDs}' and t.uuid='{IDt}' " \ "return r" .format(source=source_type, target=target_type, IDs=source_id, IDt=target_id, rel=relation_type.upper()) try: with driver.session() as session: edges = session.run(cypher) except: result["code"] = -910 result["msg"] = "数据库连接失败" return result edges = edges.data() if len(edges) > 0: tmp = process_neo4j_result(edges, "r", 1) if tmp is None: result["code"] = -1305 result["msg"] = "边查询成功,但结果处理失败" else: result["code"] = 1304 result["data"] = tmp result["msg"] = "{IDs}和{IDt}的{REL}关系已经存在".format(IDs=source_id, IDt=target_id, REL=relation_type) return result if to_create: # 解析属性 tmp_uuid = uuid.uuid1() added_date = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) cypher_cond = "{uuid:'" + tmp_uuid.hex + "', added_date:'" + added_date + "', " if parameters is not None: # 生成查询语句 for (key, value) in parameters.items(): cypher_cond += "" + key + ":'" + str(value) + "', " cypher_cond = cypher_cond[:-2] + "}" cypher = "MATCH (s:{source}), (t:{target}) where s.uuid='{IDs}' and t.uuid='{IDt}' CREATE (s) -[r:{REL}{COND}]->(t) " \ "return r" .format(source=source_type, target=target_type, IDs=source_id, IDt=target_id, REL=relation_type.upper(), COND=cypher_cond) # 执行查询+生成过程 try: with driver.session() as session: edges = session.run(cypher) except: result["code"] = -910 result["msg"] = "数据库连接失败" return result edges = edges.data() if len(edges) > 0: tmp = process_neo4j_result(edges, "r", 2) if tmp is None: result["code"] = -1305 result["msg"] = "边新建成功,但结果处理失败" else: result["code"] = 1306 result["data"] = tmp result["msg"] = "{IDs}和{IDt}的{REL}关系创建成功".format( IDs=source_id, IDt=target_id, REL=relation_type) else: result["code"] = -1306 result["msg"] = "{IDs}和{IDt}的{REL}关系创建关系失败,数据库操作失败".format( IDs=source_id, IDt=target_id, REL=relation_type) else: result["code"] = 1305 result["msg"] = "数据库无记录,已选择不创建新节点" return result
def query_bib_node_by_multiple_field(driver, node_type, node_info, page=None, limit=None): """ 利用Publication/venue/person的多个字段来进行查询,并对处理结果进行解析。checked :param limit: :param page: :param node_type: :param driver: :param node_info:pub uuid :return: """ # 根据提供的文献信息,查询数据库,并整理结果 result = ini_result() if driver is None or node_info is None or not isinstance( node_info, dict) or node_type not in [ "PUBLICATION", "PERSON", "VENUE" ]: result[RESULT_CODE] = -901 result[RESULT_MSG] = "invalid arguments" return result identifier = "m" tmp = query_by_multiple_field(driver, node_info, node_type, page, limit, identifier) if tmp[RESULT_CODE] != 904: result[RESULT_CODE] = tmp[RESULT_CODE] result[RESULT_MSG] = tmp[RESULT_MSG] return result records = tmp[RESULT_DATA] result[RESULT_CODE] = 905 result[RESULT_MSG] = "success" if node_type in ["VENUE", "PERSON"]: result[RESULT_DATA] = records return result pubs = [] for record in records: page1, page2 = split_pages(record["pages"]) # 处理页码 pub = { "node_type": record["node_type"], "book_title": null_string(record["book_title"]), "how_published": null_string(record["how_published"]), "title": null_string(record["title"]), "author": null_string(record["author"]), "editor": null_string(record["editor"]), "keywords": null_string(record["keywords"]), "edition": null_string(record["edition"]), "year": null_int(record["year"]), "month": null_string(record["month"]), "journal": null_string(record["journal"]), "volume": null_string(record["volume"]), "type": null_string(record["type"]), "chapter": null_string(record["chapter"]), "number": null_string(record["number"]), "pages1": null_string(page1), "pages2": null_string(page2), "publisher": null_string(record["publisher"]), "organization": null_string(record["organization"]), "institution": null_string(record["institution"]), "school": null_string(record["school"]), "address": null_string(record["address"]), "series": null_string(record["series"]), "indexing": 0, "id": null_string(record["id"]), "uuid": null_string(record["uuid"]), "note": null_string(record["note"]) } pubs.append(pub) result[RESULT_DATA] = pubs return result
def create_or_match_nodes(driver, node_list, return_type="class", to_create=True): """ 根据指定的数据信息,建立节点,若节点已存在,则不操作, 要返回新生成或查询到的节点的,且要有对应关系(直接填到节点信息里).checked :param node_list: 是Publication/Venue/Person类的list :param driver: 数据库信息 :param return_type:class/dict :param to_create: 查询后若没有记录,是否新建 :return:list of Publication/Venue/Person """ # 输入数据检查 result = ini_result() if node_list is None or not isinstance(node_list, list) or len(node_list) == 0: result[RESULT_CODE] = -901 result[RESULT_MSG] = "the given data is not of type list" return result if driver is None: result[RESULT_CODE] = -500 result[RESULT_MSG] = "the database is not configured!" return result if return_type != 'class' and return_type != "dict": result[RESULT_CODE] = -1300 result[RESULT_MSG] = "返回值类型指定错误!" return result # 在数据库中查询数据 counter_processed = 0 has_failed = False bib_data_new = [] try: with driver.session() as session: for entry in node_list: query_result = query_or_create_node( session, entry, to_create=to_create, match_field=None) # 查询或创建节点,返回是list of dict if query_result[RESULT_CODE] not in [1301, 1302]: print(query_result[RESULT_MSG]) has_failed = True else: counter_processed += 1 result_content = query_result[RESULT_DATA][ 0] # todo 有多个的时候怎么办 if return_type == 'class': # todo 这里要 entry.uuid = result_content["uuid"] bib_data_new.append(entry) elif return_type == 'dict': bib_data_new.append(result_content) except: result[RESULT_CODE] = -910 result[RESULT_MSG] = "数据库连接失败" return result # 查询结果分析 if has_failed: result[RESULT_CODE] = -1304 result[RESULT_MSG] = "查询/新建节点出现错误" else: result[RESULT_CODE] = 1303 result[RESULT_MSG] = "成功" result[RESULT_DATA] = bib_data_new return result
def query_or_create_node(tx, node, to_create=True, match_field=None): """ 先查询节点是否存在,若存在,直接返回节点所有字段,否则,创建节点并返回所有数据。checked :param tx: neo4j连接 :param node:节点信息,是models.py中定义的类 :param to_create:当节点不在数据库中时是否创建新节点 :param match_field: 用来匹配数据库中记录的条件,dict :return: 若查到数据,返回uuid,否则,若to_create为真,返回新建节点uuid,否则,返回0;出错返回-1 """ # 取回查询语句 result = ini_result() if tx is None or node is None: result["code"] = -901 result["msg"] = "未提供数据查询/新建节点或未提供数据库信息" return result if match_field is None: # 利用默认的mandatory field来进行搜索 cypher = node.get_match_cypher() else: if not isinstance(match_field, dict): result["code"] = -901 result["msg"] = "提供查询的条件格式错误" return result else: cypher = node.get_match_cypher(match_field) # todo 暂未实现 # 查询数据库 try: nodes = tx.run(cypher) except: result["code"] = -910 result["msg"] = "数据库连接错误" return result nodes = nodes.data() # 分析结果 if nodes is not None and len(nodes) > 0: tmp = process_neo4j_result(nodes, "node", 1) if tmp is not None: result["data"] = tmp result["code"] = 1301 result["msg"] = "查询成功,有" + str(len(tmp["data"])) + "个匹配节点:" else: result["code"] = -1301 result["msg"] = "异常!" return result if not to_create: result["code"] = 1300 result["msg"] = "查询成功,无记录,且不创建新节点" return result node.uuid = uuid.uuid1() node.added_date = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) create_cypher = node.get_create_cypher() try: nodes = tx.run(create_cypher) except: result["code"] = -910 result["msg"] = "查询无记录,创建过程中数据库连接失败!" return result if nodes is None: result["code"] = -1303 result["msg"] = "创建失败!" return result if len(nodes) > 1: result["code"] = -1302 result["msg"] = "异常!" return result tmp = process_neo4j_result(nodes, "node", 2) if tmp is not None: result["data"] = tmp result["code"] = 152 result["msg"] = "查询无记录,创建成功!" else: result["code"] = -1301 result["msg"] = "异常!" return result
def parse_bib_file_excel(file_name, sheet_name, column_name): """ 从Excel文件中解析出文献信息,提取的结果是excel表中的各个数据,这里并不处理数据 :param column_name: 列名, e.g. bib :param sheet_name: 要分析的工作表名称, e.g. deep learning :param file_name: str 文件名 :return: """ result = ini_result() if file_name is None or not isinstance(file_name, str) or sheet_name is None or not isinstance(sheet_name, str) \ or column_name is None or not isinstance(column_name, str): result[RESULT_CODE] = -901 result[RESULT_MSG] = "invalid arguments" return result try: wb = xlrd.open_workbook(filename=file_name) # 打开文件 except: result[RESULT_CODE] = -703 result[RESULT_MSG] = '打开Excel文件失败' return result try: sheet_content = wb.sheet_by_name(sheet_name) # 通过名字获取表格 except: result[RESULT_CODE] = -704 result[RESULT_MSG] = '读取Excel指定工作簿失败' return result try: sheet_title = sheet_content.row_values(0) except: result[RESULT_CODE] = -705 result[RESULT_MSG] = '读取Excel工作簿表头失败' return result try: col_index = sheet_title.index(column_name) # 列下标 except ValueError: result[RESULT_CODE] = -706 result[RESULT_MSG] = '指定的工作表中没有' + column_name + '列' return result content = [] counter_bib = 0 # 成功解析出的行数 counter_all = sheet_content.nrows - 1 # 总信息行数 counter_null = 0 # 空白行个数 for index in range(1, sheet_content.nrows): # index是行下标 value = sheet_content.cell_value(index, col_index) if value == "": print("第" + str(index) + "行文献没有bib信息【" + str(sheet_content.row_values(index)) + "】") counter_null += 1 continue try: tmp = bibtexparser.loads(value) except: print("第" + str(index) + "行文献解析bib过程失败【" + str(sheet_content.row_values(index)) + "】") # 内部错误,不返回错误码 continue if tmp is not None: content.append(tmp.entries[0]) counter_bib += counter_bib else: print("第" + str(index) + "行文献解析bib过程失败2【" + str(sheet_content.row_values(index)) + "】") # 内部错误,不返回错误码 if counter_bib == (counter_all - counter_null): result[RESULT_MSG] = "【总结】Excel文件<" + file_name + ">的<" + sheet_name + ">页<" + column_name + \ ">列全部信息(除空白行)均有效解析,共" + str(counter_all) + "行," + str(counter_null) + "空白行," + \ str(counter_bib) + "成功解析" result[RESULT_CODE] = 1102 result[RESULT_DATA] = content return result if counter_null == counter_all: result[ RESULT_MSG] = "【总结】Excel文件<" + file_name + ">的<" + sheet_name + ">页<" + column_name + ">列全部为空白" result[RESULT_CODE] = 1103 return result if (counter_all - counter_null) > counter_bib > 0: result[RESULT_MSG] = "【总结】Excel文件<" + file_name + ">的<" + sheet_name + ">页<" + column_name + \ ">列全部信息(除空白行)部分有效解析,共" + str(counter_all) + "行," + str(counter_null) + "空白行," + \ str(counter_bib) + "成功解析" result[RESULT_CODE] = 1104 result[RESULT_DATA] = content return result if counter_all > counter_null and counter_bib == 0: result[ RESULT_MSG] = "【总结】Excel文件<" + file_name + ">的<" + sheet_name + ">页<" + column_name + ">列(除空白外)全部解析失败" result[RESULT_CODE] = -1101 return result result[ RESULT_MSG] = "【总结】Excel文件<" + file_name + ">的<" + sheet_name + ">页<" + column_name + ">列。结果未考虑!" result[RESULT_CODE] = -1102 return result
def query_person_pub_venue_by_person_name(driver, person_list, skip=None, limit=None): """ 利用person name来获取Publication、venue等数据,用来展示关系图。checked :param limit: :param skip: :param person_list: list of person name :param driver: :return: """ result = ini_result() if driver is None or person_list is None or not isinstance( person_list, list): result[RESULT_CODE] = -901 result[RESULT_MSG] = "invalid arguments" return result # 查询是否存在数据 cypher_total = "match (n:Person)-[r:Write]->(m:Publication) where n.name in " + str(person_list) + \ " return count(m)" with driver.session() as session: records = session.run(cypher_total) total = records.value()[0] if total == 0: # no data found result[RESULT_CODE] = 901 result[RESULT_MSG] = "no matched data" return result # get details of matched Publication and Person cypher = "match (m:Person)-[r:Write]->(n:Publication) where m.name in " + str(person_list) + \ " return n, m order by n.year" if skip is not None: cypher += " skip " + str(skip) + " limit " + str(limit) nodes = [] node_ids = [] relations = [] with driver.session() as session: records = session.run(cypher) for record in records: print("查询person-->publication结果成功") # 组装结果 if record["m"]["uuid"] not in node_ids: node = { "id": record["m"]["uuid"], "label": record["m"]["name"], "type": "person" } nodes.append(node) node_ids.append(record["m"]["uuid"]) if record["n"]["uuid"] not in node_ids: node = { "id": record["n"]["uuid"], "label": record["n"]["title"], "type": "publication" } nodes.append(node) node_ids.append(record["n"]["uuid"]) relation = {"from": record["m"]["uuid"], "to": record["n"]["uuid"]} relations.append(relation) if not nodes: result[RESULT_CODE] = -902 result[ RESULT_MSG] = "failed to get matched data (Publication<-Write<-Person)" return result # get details of matched Publication and Venue cypher = "match (m:Publication)-[r:PUBLISH_IN]->(n:Venue) where m.uuid in " + str(node_ids) + \ " return n, m" with driver.session() as session: records = session.run(cypher) for record in records: print("查询publication-->venue结果成功") # 组装结果 if record["m"]["uuid"] not in node_ids: node = { "id": record["m"]["uuid"], "label": record["m"]["title"], "type": "publication" } nodes.append(node) node_ids.append(record["m"]["uuid"]) if record["n"]["uuid"] not in node_ids: node = { "id": record["n"]["uuid"], "label": record["n"]["venue_name"], "type": "venue" } nodes.append(node) node_ids.append(record["n"]["uuid"]) relation = {"from": record["m"]["uuid"], "to": record["n"]["uuid"]} relations.append(relation) result[RESULT_CODE] = 900 result[RESULT_MSG] = "success" result[RESULT_DATA] = {"relation": relations, "nodes": nodes} return result
def query_one_pub_by_uuid(driver, pub_id): """ 利用Publication的uuid来查询其详情,checked,todo 当前有多个匹配的时候,只选取第一个 :param driver: :param pub_id:str, pub uuid :return: """ result = ini_result() if driver is None or pub_id is None or not isinstance(pub_id, str): result[RESULT_CODE] = -901 result[RESULT_MSG] = "invalid arguments" return result # 查询是否存在数据 cypher = "match (m:Publication {uuid:'" + pub_id + "'}) <-[r:Write]- (n:Person) return m, n" with driver.session() as session: records = session.run(cypher) records = records.data() if records is None or len(records) == 0: result[RESULT_MSG] = "no matched data" result[RESULT_CODE] = 902 return result # 处理特殊数据 pages = records[0]["m"]["pages"] page1, page2 = split_pages(pages) # 处理author authors = [item["n"]["name"] for item in records] authors = [split_name(name, authors) for name in authors] pub = { "paperTypeEdit": records[0]["m"]["node_type"], "title": null_string(records[0]["m"]["title"]), "booktitle": null_string(records[0]["m"]["book_title"]), "author": authors, "editor": null_string(records[0]["m"]["editor"]), "keywords": null_string(records[0]["m"]["keywords"]), "edition": null_string(records[0]["m"]["edition"]), "year": null_string(records[0]["m"]["year"]), "month": null_string(records[0]["m"]["month"]), "journal": null_string(records[0]["m"]["journal"]), "volume": null_string(records[0]["m"]["volume"]), "type": null_string(records[0]["m"]["type"]), "chapter": null_string(records[0]["m"]["chapter"]), "number": null_string(records[0]["m"]["number"]), "pages1": null_string(page1), "pages2": null_string(page2), "publisher": null_string(records[0]["m"]["publisher"]), "organization": null_string(records[0]["m"]["organization"]), "institution": null_string(records[0]["m"]["institution"]), "school": null_string(records[0]["m"]["school"]), "address": null_string(records[0]["m"]["address"]), "series": null_string(records[0]["m"]["series"]), "howpublished": null_string(records[0]["m"]["how_published"]), "indexing": 0, "note": null_string(records[0]["m"]["note"]) } result[RESULT_MSG] = "success" result[RESULT_CODE] = 903 result[RESULT_DATA] = pub return result
def build_relation_from_node_attribute(driver, source_node_type="PUBLICATION", target_node_type="VENUE", rel_type="PUBLISHED_IN", filters={"node_type": "ARTICLE"}, info_field='JOURNAL', use_source=1, do_split=False): """ 实现了从某一类节点的指定字段中提取信息,新建其他节点并建立指定的连接,这个可以处理info_field中包含多个节点信息的情况,现在只支持人的 多个信息处理 :param driver: neo4j 连接信息 :param source_node_type: 边起点类型 :param target_node_type: 边终点类型 :param rel_type: 边类型 :param filters:dict,分析时,对接点进行过滤的条件,key为字段名,value为选出的可行值 :param info_field: 待分析的字段名 :param use_source: boolean,当为1时,使用起点节点进行数据分析,当为0时,使用终点节点进行数据分析 :param do_split :return:json格式,其中 code:-1:输入filters无效,-2:没查询到Publication节点,-3:Publication节点中没有指定信息,-4:创建/查询新节点失败, -5:未识别的新节点类型,-6:部分边创建失败,1:创建成功 msg: """ result = ini_result() if driver is None: result[RESULT_CODE] = -500 result[RESULT_MSG] = "driver is not given" return result if source_node_type not in NODE_TYPES or target_node_type not in NODE_TYPES: result[RESULT_CODE] = -1202 result[RESULT_MSG] = "node type is not valid" return result if rel_type not in EDGE_TYPES: result[RESULT_CODE] = -1203 result[RESULT_MSG] = "edge type is not valid" return result if info_field not in FIELD_NAMES_PUB: result[RESULT_CODE] = -1204 result[RESULT_MSG] = "field is not valid" return result if (filters is not None and not isinstance(filters, dict)) or \ (filters is not None and len(set(filters.keys()) & set(FIELD_NAMES_PUB)) == 0) or \ use_source is None or do_split is None: result[RESULT_CODE] = -901 result[RESULT_MSG] = "invalid arguments" return result identifier = 'node' # 解析filters的有效性,并生成查询条件语句 if filters is not None: tmp_filter_str = "" for (key, value) in filters.items(): tmp_filter_str += identifier + "." + key + "='" + value + "' and " tmp_filter_str = tmp_filter_str[:-5] # 生成完整查询语句 if use_source: cypher = "match ({IF}:{NODE}) where {FILTER} return {IF}".format(IF=identifier, NODE=source_node_type, FILTER=tmp_filter_str) else: cypher = "match ({IF}:{NODE}) where {FILTER} return {IF}".format(IF=identifier, NODE=target_node_type, FILTER=tmp_filter_str) else: # 生成完整查询语句 if use_source: cypher = "match ({IF}:{NODE}) return {IF}".format(IF=identifier, NODE=source_node_type) else: cypher = "match ({IF}:{NODE}) return {IF}".format(IF=identifier, NODE=target_node_type) # 查询在指定条件下的指定类型节点 data_pair = [] # 查询之后的输出 source: source_node_type的uuid, target:venue的name, parameter:其他属性,如作者排序等 if use_source: new_type = target_node_type else: new_type = source_node_type try: with driver.session() as session: nodes = session.run(cypher) except: result[RESULT_CODE] = -910 result[RESULT_MSG] = "数据库连接失败" return result counter_has_content = 0 counter_all = 0 # counter_processed = 0 todo 这里没有检查有数据的记录是否成功处理,后面要加 if use_source: for record in nodes: # 把各节点的info_field字段提取出来,组成dict,key是节点的uuid,value是info_field字段值 record_id = record[identifier][FIELD_NAMES_PUB[31]] # uuid print("提取{NODE}与{NODE2}之间关系{REL}过程:查询到节点:".format(NODE=source_node_type, NODE2=target_node_type, REL=rel_type) + str(record_id)) counter_all += 1 record_field = record[identifier][info_field] if not string_util(record_field): print("{ID} has empty {FIELD} field".format(ID=record_id, FIELD=info_field)) else: if do_split: # 需要将字段进行分割,然后生成多个节点 if new_type.upper() == NODE_TYPES[2]: # person # todo 这是是什么情况?? names = process_person_names([record_field]) # 这里拆分成了多个,返回值:dict, original authors: list of dict of authors names = names[record_field] # list of dict = {name, index} for name in names: tmp = {"source": record_id, "target": name["name"], "parameter": {"index": name["index"]}} data_pair.append(tmp) else: tmp = {"source": record_id, "target": record_field, "parameter": None} data_pair.append(tmp) result[RESULT_MSG] += "暂不支持针对【" + new_type + "】的拆分" print("暂不支持针对【" + new_type + "】的拆分") else: tmp = {"source": record_id, "target": record_field, "parameter": None} data_pair.append(tmp) counter_has_content += 1 else: for record in nodes: # 把各节点的info_field字段提取出来,组成dict,key是节点的uuid,value是info_field字段值 record_id = record[identifier][FIELD_NAMES_PUB[31]] # uuid print("提取{NODE}与{NODE2}之间关系{REL}过程:查询到节点:".format(NODE=source_node_type, NODE2=target_node_type, REL=rel_type) + str(record_id)) counter_all += 1 record_field = record[identifier][info_field] if not string_util(record_field): print("{ID} has empty {FIELD} field".format(ID=record_id, FIELD=info_field)) else: if do_split: # 需要将字段进行分割,然后生成多个节点 if new_type.upper() == NODE_TYPES[2]: # person names = process_person_names([record_field]) # 这里拆分成了多个,返回值:dict, original authors: list of dict of authors names = names[record_field] # list of dict = {name, index} for name in names: tmp = {"target": record_id, "source": name["name"], "parameter": {"index": name["index"]}} data_pair.append(tmp) else: tmp = {"target": record_id, "source": record_field} data_pair.append(tmp) result[RESULT_MSG] += "暂不支持针对【" + new_type + "】的拆分" print("暂不支持针对【" + new_type + "】的拆分") else: tmp = {"target": record_id, "source": record_field} data_pair.append(tmp) counter_has_content += 1 if counter_all == 0: result[RESULT_CODE] = 125 result[RESULT_MSG] += "\n 提取{NODE}与{NODE2}之间关系{REL}过程:未查询到{NODE_Q}节点中满足条件{FILTER}的节点".format( NODE=source_node_type, FILTER=str(filters), NODE2=target_node_type, REL=rel_type, NODE_Q=new_type) print(result[RESULT_MSG]) return result if counter_has_content == 0: result[RESULT_CODE] = 126 result[RESULT_MSG] += "\n 提取{NODE}与{NODE2}之间关系{REL}过程:在满足条件{FILTER}的{NODE_Q}节点的字段{FIELD}中没有有效信息".format( NODE=source_node_type, FILTER=str(filters), NODE2=target_node_type, REL=rel_type, FIELD=info_field, NODE_Q=new_type) print(result[RESULT_MSG]) return result # 先将要生成的节点数据筛选出来 todo 没有检查是否所有记录都成功处理 data_switched = [] if use_source: for tmp in data_pair: data_switched.append(tmp["target"]) else: for tmp in data_pair: data_switched.append(tmp["source"]) data_switched = list(set(data_switched)) # 将要建立节点的信息进行封装,models的类 info = {"name": data_switched, "type": new_type} nodes = wrap_info_to_model(info, filters) # node是封装后的节点类 # 查询/创建节点 create_result = create_or_match_nodes(nodes, database_info, to_create=True, return_type="class") if create_result[RESULT_CODE] < 0: result[RESULT_CODE] = -128 result[RESULT_MSG] = create_result[RESULT_MSG] + "\t 查询/新建节点信息失败,停止创建关系" return result # 解析出新生成节点的uuid和关键字的对应关系,key是关键字,value是uuid nodes = create_result[RESULT_DATA] mapping = {} for datum in nodes: if new_type == NODE_TYPES[0]: # Publication mapping[datum.id] = datum.uuid elif new_type == NODE_TYPES[1]: # Venue mapping[datum.venue_name] = datum.uuid elif new_type == NODE_TYPES[2]: # Person mapping[datum.full_name] = datum.uuid # 更新data_pair,将其中关键字部分改成uuid if use_source: for pair in data_pair: tmp_id = mapping[pair["target"]] pair["target"] = tmp_id else: for pair in data_pair: tmp_id = mapping[pair["source"]] pair["source"] = tmp_id # 查询/建立边 counter_has_content = 0 counter_all = len(data_pair) with driver.session() as session: for pair in data_pair: tmp = query_or_create_relation(session, source_node_type, pair["source"], target_node_type, pair["target"], rel_type, to_create=True, parameters=pair["parameter"]) if tmp[RESULT_CODE] == 170 or tmp[RESULT_CODE] == 171: counter_has_content += 1 if counter_has_content < counter_all: result[RESULT_CODE] = -131 result[RESULT_MSG] = "部分边创建失败" return result else: result[RESULT_CODE] = 127 result[RESULT_MSG] = "全部边创建成功" return result
def resolve_deprel(request): """ 由“开始解析”按钮触发的对给定句子的解析,对外返回解析结果。 注:采用ajax + post方式,header指定{"X-Requested-With":"XMLHttpRequest", "Content-Type":"application/x-www-form-urlencoded"} :param request: 包含待解析sentence :return: """ result = ini_result() is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' if not is_ajax or request.method != 'POST': result[RESULT_CODE] = -103 result[ RESULT_MSG] = "not supported request form (should be post and with ajax)" return wrap_result(result) data = request.body # if data is None or data == "": result[RESULT_CODE] = -301 result[RESULT_MSG] = "no data is given" return wrap_result(result) try: data = bytes.decode(data) data = json.loads(data) sentence = data["sentence"] # 调用ddparser处理结果-命名规则:sentence+timestamp resolve_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") words, head, relation, pos_tag = dd_parser_caller(sentence) # 生成依存关系图 graph_name = sentence + '_' + resolve_time # 拼装:句子+时间 g = generate_dep_rel_graph( os.path.join(current_dir, 'static/images/cache'), graph_name, words, relation, head) if g is None: print("failed to generate dependency graph") result[RESULT_CODE] = -303 result[RESULT_MSG] = "failed to generate dependency graph" result[RESULT_DATA] = { 'sentence': sentence, 'deprel': None, 'relation': str(relation), 'words': str(words), 'head': str(head) } else: print('successfully generate dependency graph') result[RESULT_CODE] = 300 result[RESULT_MSG] = "success" result[RESULT_DATA] = { 'sentence': sentence, 'deprel': graph_name + ".png", 'relation': str(relation), 'words': str(words), 'head': str(head) } except TypeError: result[RESULT_CODE] = -302 result[RESULT_MSG] = "no valid data is given" return wrap_result(result)
def save_deprel_result(request): """ 存储页面标注结果,将修改信息存储在txt文件中,格式为:id 提交评论时间 句子 解析文件路径(名) relation words head 是否有问题 comments(\t 隔开) :param request: :return: """ result = ini_result() data = request.body if data is None or data == "": result[RESULT_CODE] = -301 result[RESULT_MSG] = "no data is given" return wrap_result(result) is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' if not is_ajax or request.method != 'POST': result[RESULT_CODE] = -103 result[ RESULT_MSG] = "not supported request form (should be post and with ajax)" return wrap_result(result) try: data = bytes.decode(data) data = json.loads(data) record_uuid = str(uuid.uuid1()) record_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") sentence = data.get('sentence', '-') result_file_name = data.get('image_path', '-') words = data.get('words', '-') relation = data.get('relation', '-') head = data.get('head', '-') is_correct = data.get('is_problematic', '-') comments = data.get('comments', '-') tmp = [ record_uuid, record_time, sentence, result_file_name, words, relation, head, is_correct, comments ] tmp = '\t'.join(tmp) with open(log_dir.joinpath('dep_rel_service_log.log'), 'a+', encoding="utf-8") as f: f.write(tmp + '\n') result[RESULT_MSG] = "successfully saved" result[RESULT_CODE] = 400 # result["field"] = data except TypeError: result[RESULT_CODE] = -402 result[RESULT_MSG] = "no valid data is given" return wrap_result(result)
def command_resolve(request): """ 基本功能,按照模板匹配的方式处理指令,为进行grounding :return: """ request_method = request.method result = ini_result() if request_method != 'GET': result[RESULT_CODE] = -101 result["message"] = "request method should be get" return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8') command = request.GET.get("command", None) session_id = request.GET.get("sessionId", '') robot_id = request.GET.get("robotId", '') if not command or not session_id or not robot_id: result[RESULT_CODE] = -802 result["message"] = "No command found" return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8') result = resolve(command) if not result["success"]: result[RESULT_CODE] = -803 result["message"] = "failed to resolve" return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8') task = result.get("task", None) if task is None: result[RESULT_CODE] = -804 result["message"] = "success without task information" return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8') task["sessionId"] = session_id task["robotId"] = robot_id # todo send to reasoning service # ret = requests.post(PLANNING_URL, json=task).text msg_to_front = {'input': command, 'output': task} msg_to_front = json.dumps(msg_to_front, ensure_ascii=False) mqtt_client.publish(topic, msg_to_front) ret = '{RESULT_CODE: 200}' try: ret_content = json.loads(ret) reason_code = ret_content.get(RESULT_CODE, 0) except json.JSONDecodeError or TypeError: result[RESULT_CODE] = 801 result["message"] = "success but failed to send to reasoning module" return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8') if reason_code != 200: result[RESULT_CODE] = 802 result["message"] = "success but reasoning failed" return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8') result[RESULT_CODE] = 800 result["message"] = "success" print_log(request, result) return HttpResponse(json.dumps(result, ensure_ascii=False), content_type='application/json', charset='utf-8')
def extract_publication_from_bib_info(entry): """ 从bib info中构造出PUBLICATION节点. checked :param entry: dict of bib info :return: PUBLICATION """ result = ini_result() if entry is None or not isinstance(entry, dict) or entry.keys() is None: result[RESULT_CODE] = -1001 result[RESULT_MSG] = "输入数据错误!" return result entry = capitalize_dict_keys(entry) node_type = entry.get("ENTRYTYPE", None) if node_type is None or node_type not in PUBLICATION_TYPES: result[RESULT_CODE] = -1002 result[RESULT_MSG] = "unrecognized entry type:" + str(entry) return result entry_processed = { field_name: entry.get(field_name).upper() for field_name in FIELD_NAMES_PUB if field_name in entry.keys() } core_field_check = check_core_fields(node_type, entry_processed) if not core_field_check: result[RESULT_CODE] = -1003 result[RESULT_MSG] = "缺少必填字段" + str(entry) return result author = entry_processed.get("AUTHOR", None) editor = entry_processed.get("EDITOR", None) title = entry_processed.get("TITLE", None) journal = entry_processed.get("JOURNAL", None) year = entry_processed.get("YEAR", None) volume = entry_processed.get("VOLUME", None) number = entry_processed.get("NUMBER", None) series = entry_processed.get("SERIES", None) address = entry_processed.get("ADDRESS", None) pages = entry_processed.get("PAGES", None) month = entry_processed.get("MONTH", None) note = entry_processed.get("NOTE", None) publisher = entry_processed.get("PUBLISHER", None) edition = entry_processed.get("EDITION", None) book_title = entry_processed.get("BOOKTITLE", None) organization = entry_processed.get("ORGANIZATION", None) chapter = entry_processed.get("CHAPTER", None) school = entry_processed.get("SCHOOL", None) field_type = entry_processed.get("type", None) how_published = entry_processed.get("HOWPUBLISHED", None) keywords = entry_processed.get("KEYWORDS", None) abstract = entry_processed.get("ABSTRACT", None) note_id = entry_processed.get("NOTEID", None) institution = entry_processed.get("INSTITUTION", None) added_by = entry_processed.get("ADDEDBY", None) added_date = entry_processed.get("ADDEDDATE", None) sci_index = entry_processed.get("SCIINDEX", None) ei_index = entry_processed.get("EIINDEX", None) ssci_index = entry_processed.get("SSCIINDEX", None) modified_date = entry_processed.get("MODIFIEDDATE", None) field_id = entry_processed.get("ID", None) node = Publication("", node_type, author=author, editor=editor, title=title, journal=journal, year=year, volume=volume, number=number, series=series, address=address, pages=pages, month=month, note=note, publisher=publisher, edition=edition, book_title=book_title, organization=organization, chapter=chapter, school=school, type=field_type, how_published=how_published, keywords=keywords, abstract=abstract, note_id=note_id, institution=institution, added_by=added_by, added_date=added_date, sci_index=sci_index, ei_index=ei_index, ssci_index=ssci_index, modified_date=modified_date, id=field_id) result[RESULT_CODE] = 1001 result[RESULT_MSG] = "success" result[RESULT_DATA] = node return result
def build(driver, file_name, sheet_name=None, column_name=None): """ 从文件中提取文献信息,并生成Publication、Venue、Person和他们之间的Published_in、Authored_by关系。 ---checked :param driver: :param file_name: :param sheet_name: :param column_name: :return: """ result = ini_result() if file_name is None or not os.path.exists(file_name): result["code"] = -504 result["msg"] = "file does not exist" return result if driver is None: result[RESULT_CODE] = -500 result[RESULT_MSG] = "the database is not configured!" return result # 从文件中提取info publication_info = extract_bib_info_from_file(file_name, sheet_name, column_name) # dict pubs, venues, persons = [], [], [] fail_pub, fail_venue, fail_person = [], [], [] for entry in publication_info: # 解析文献 tmp_result_pub = extract_publication_from_bib_info(entry) if tmp_result_pub[RESULT_CODE] == 1001: pubs.append(tmp_result_pub[RESULT_DATA]) else: fail_pub.append(entry) # 解析venue tmp_result_venue = extract_venue_from_bib_info(entry) if tmp_result_venue[RESULT_CODE] == 1005: venues.append(tmp_result_venue[RESULT_DATA]) else: fail_venue.append(entry) # 解析person tmp_result_person = extract_person_from_bib_info(entry) if tmp_result_person[RESULT_CODE] == 1006: persons.append(tmp_result_person[RESULT_DATA]) else: fail_person.append(entry) pubs = None if pubs == [] else pubs venues = None if venues == [] else venues persons = None if persons == [] else persons # 利用提取的Publication、Venue、Person写入数据库 db_pub_result = create_or_match_nodes(driver, pubs, return_type="class", to_create=True) if db_pub_result[RESULT_CODE] != 1303: result[RESULT_CODE] = -1205 result[RESULT_MSG] = "Publication节点生成失败" db_ven_result = create_or_match_nodes(driver, venues, return_type="class", to_create=True) if db_ven_result[RESULT_CODE] != 1303: result[RESULT_CODE] = -1205 result[RESULT_MSG] += "。Venue节点生成失败" db_ppl_result = create_or_match_nodes(driver, persons, return_type="class", to_create=True) if db_ppl_result[RESULT_CODE] != 1303: result[RESULT_CODE] = -1205 result[RESULT_MSG] += "。Person节点生成失败" if result[RESULT_CODE] == -1205: return result # 生成边 published in publish_in_info_result = extract_rel_publish_in_from_pub_info(db_pub_result[RESULT_DATA]) # 获取published_in信息 failed_pair = [] if publish_in_info_result[RESULT_CODE] in [1009, 1010]: # 若提取成功,则创建边 pairs = publish_in_info_result[RESULT_DATA]["success"] for entry in pairs: venue_info = [entry] venue_result = create_or_match_nodes(driver, venue_info, "VENUE") if venue_result[RESULT_CODE] != 904: failed_pair.append(entry) continue tmp_result = query_or_create_relation(driver, "PUBLICATION", entry["pub"].uuid, "VENUE", venue_result[RESULT_DATA][0].uuid, "PUBLISHED_IN") if tmp_result[RESULT_CODE] not in [1304, 1306]: failed_pair.append(entry) # 生成边 AUTHORED_BY author_by_info_result = extract_rel_author_by_from_pub_info(db_pub_result[RESULT_DATA]) # 获取authored_by信息 if author_by_info_result[RESULT_CODE] in [1009, 1010]: # 若提取成功,则创建边 pairs = author_by_info_result[RESULT_DATA]["success"] for entry in pairs: person_info = [entry] person_result = create_or_match_nodes(driver, person_info, "PERSON") if person_result[RESULT_CODE] != 904: failed_pair.append(entry) continue tmp_result = query_or_create_relation(driver, "PUBLICATION", entry["pub"].uuid, "PERSON", person_result[RESULT_DATA][0].uuid, "AUTHORED_BY") if tmp_result[RESULT_CODE] not in [1304, 1306]: failed_pair.append(entry) # failed_pair = None if failed_pair == [] else failed_pair if failed_pair is not None: result[RESULT_CODE] = 1400 result[RESULT_MSG] = "success" else: result[RESULT_CODE] = -1400 result[RESULT_MSG] = "partially or fully failed" return result