Пример #1
0
 def load_all_code2sysid():
     allSysNodes = {}
     json_data_chemis = load_json_file(CONFIG.chemisCode2SysID)
     allSysNodes['化学'] = json_data_chemis
     json_data_break = load_json_file(CONFIG.breakCode2SysID)
     allSysNodes['暴发'] = json_data_break
     json_data_micro = load_json_file(CONFIG.microCode2SysID)
     allSysNodes['微生物'] = json_data_micro
     return allSysNodes
Пример #2
0
 def load_allSys_nodes(self):
     allSysNodes = {}
     json_data_chemis = load_json_file(CONFIG.chemisNodes)
     dict_chemis = self.getIdCode(self, json_data_chemis)
     allSysNodes['化学'] = dict_chemis
     json_data_break = load_json_file(CONFIG.breakNodes)
     dict_break = self.getIdCode(self, json_data_break)
     allSysNodes['暴发'] = dict_break
     json_data_micro = load_json_file(CONFIG.microNodes)
     dict_micro = self.getIdCode(self, json_data_micro)
     allSysNodes['微生物'] = dict_micro
     return allSysNodes
Пример #3
0
def excel_standard_foods():
    def synonyms_num_list(original_list):  #同义词个数列表
        synonyms_num = []
        for i in original_list:
            if i['use_flag'] == True:
                synonyms_num.append(len(i['synonyms']))
        return synonyms_num, max(synonyms_num)

    standard_foods = load_json_file(standard_foods_jsonFile)
    food_id2code = id2code(food_code2id_file)

    synonyms_num, max_num = synonyms_num_list(standard_foods[2:])
    col_name = ['序号', '一级分类名称', '编码', '同义词数量', '同义词']
    for i in range(max_num - 1):
        col_name.append('')  #用''补全最长的列数
    excelData = pd.DataFrame(columns=col_name)

    food_id = 1
    for node in standard_foods[2:]:
        if node['use_flag'] == True:
            row = [
                food_id, node['name'],
                food_id2code.get(node['id']), synonyms_num[food_id - 1]
            ]
            for i in node['synonyms'].keys():
                row.append(i)
            na_len = max_num + 4 - len(row)
            for i in range(na_len):
                row.append('')
            excelData.loc[food_id - 1] = row
            food_id += 1
    excelData.to_excel(excel_standardFoods_saveFile, index=None)
Пример #4
0
 def load_standard_attributes():
     json_data = load_json_file(CONFIG.standard_attributes_file)
     result = dict()
     for attribute in json_data:
         obj = StandardAttribute('', '', '')
         obj.from_json(attribute)
         result[obj.id] = obj
     return result
Пример #5
0
 def load_standard_foods():
     json_data = load_json_file(CONFIG.standard_foods_file)
     result = dict()
     for food in json_data:
         obj = StandardFoodNode('', '', '')
         obj.from_json(food)
         result[obj.id] = obj
     return result
Пример #6
0
 def load_general_foods():
     json_data = load_json_file(CONFIG.general_foods_file)
     result = dict()
     for field, nodes in json_data.items():
         result[field] = dict()
         for food in nodes:
             obj = GeneralFoodNode('', '', '', '')
             obj.from_json(food)
             result[field][obj.id] = obj
     return result
Пример #7
0
def excel_standard_attributes():
    standard_attributes = load_json_file(standard_attributes_jsonFile)
    attribute_id2code = id2code(attribute_code2id_file)
    col_name = ['序号', '编码', '属性描述名称', '大类编码', '上级编码']
    excelData = pd.DataFrame(columns=col_name)
    attribute_id = 1
    for node in standard_attributes[2:]:
        if node['use_flag'] == True:
            row = [
                attribute_id, attribute_id2code[node['id']], node['name'],
                attribute_id2code[node['id']][1],
                attribute_id2code[node['parent_id']]
            ]
            excelData.loc[attribute_id - 1] = row
            attribute_id += 1
    excelData.to_excel(excel_standardAttributes_saveFile, index=None)
Пример #8
0
def excel_general_foods():
    def find_node(id, node_list):  #通过id在node列表找到对应的node
        for node in node_list:
            if node['id'] == id:
                return node

    def find_path(id, node_list, path_list):  #递归找到路径的列表
        node = find_node(id, node_list)
        if node['name'] != 'root':
            find_path(node['parent_id'], node_list, path_list)
        path_list.append(node['name'])
        return path_list

    def find_class_attr_code(node, standard_foods, food_id2code,
                             attribute_id2code):
        category, attribute = [], []  #得到分类码+属性码字符串
        field = node['field']
        if len(node['ontology']) > 0:
            for ontology in node['ontology']:
                category_code = food_id2code.get(ontology)
                if category_code not in category:  #去除重复元素
                    category.append(category_code)
                onto_node = find_node(ontology, standard_foods)
                attri_list = onto_node['entity'].get(field).get(node['id'])
                for attri in attri_list:
                    attri_code = attribute_id2code.get(attri)
                    if attri_code not in attribute:
                        attribute.append(attri_code)
            if len(attribute) > 0:
                return (','.join(category)) + '|' + (','.join(attribute))
            else:
                return ','.join(category)

    general_foods = load_json_file(general_foods_jsonFile)
    standard_foods = load_json_file(standard_foods_jsonFile)
    food_id2code = id2code(food_code2id_file)
    attribute_id2code = id2code(attribute_code2id_file)

    header = ['主键', '父ID', '名称', '食品编码', '分类+属性码', '路径', 'ROWID']
    writer = pd.ExcelWriter(excel_generalFoods_saveFile)
    for field, sheet in CONFIG.general_filed_sheetname.items():
        excelData = pd.DataFrame(columns=header)
        field_id2code = id2code(
            os.path.join(CONFIG.json_tree_folder,
                         field + 'Code2SystemID.json'))
        field_list = general_foods[field]
        i = 0
        for node in field_list[2:]:
            if node['use_flag'] == True:
                path = find_path(node['id'], field_list, [])
                code = find_class_attr_code(node, standard_foods, food_id2code,
                                            attribute_id2code)
                row = [
                    re.sub('\D', '', node['id']),
                    re.sub('\D', '', node['parent_id']), node['name'],
                    field_id2code.get(node['id']), code, '_'.join(path[1:]),
                    node['id']
                ]
                excelData.loc[i] = row
                i += 1
        excelData.to_excel(writer, sheet_name=sheet, index=None)
    writer.save()
Пример #9
0
def id2code(code2id_filePath):
    code2id_data = load_json_file(code2id_filePath)
    return dict(zip(code2id_data.values(), code2id_data.keys()))
Пример #10
0
def generate_general_foods():
    field_sheetname = CONFIG.general_filed_sheetname
    food_code2id = load_json_file(food_code_to_id_file)
    attribute_code2id = load_json_file(attribute_code_to_id_file)

    def parse_ontology(s, food_codes_set: set, attribute_codes_set: set, sheet,
                       line, name):
        # 正则解析各种乱七八糟的编码格式
        ontology = s.strip()
        if ontology == '':
            return [], []
        food_code_pattern = r'F0[A-Z]((([0-9]){2}((\.)?([0-9]){2})*)?)'
        attribute_code_pattern = r'A[A-Z]((([0-9]){2}((\.)?([0-9]){2})*)?)'
        food_codes = [
            ''.join(x.group().split('.'))
            for x in re.finditer(food_code_pattern, s)
        ]
        attribute_codes = [
            ''.join(x.group().split('.'))
            for x in re.finditer(attribute_code_pattern, s)
        ]
        err_position = 'Sheet: %s, Line: %d, Name: %s' % (sheet, line, name)
        err_flag = False
        food_code_res = []
        attribute_code_res = []
        for code in food_codes:
            if code not in food_codes_set:
                print('错误! 映射的食品编码不存在: %s' % code)
                err_flag = True
            else:
                food_code_res.append(code)
        for code in attribute_codes:
            if code not in attribute_codes_set:
                print('错误! 映射的属性编码不存在: %s' % code)
                err_flag = True
            else:
                attribute_code_res.append(code)
        if len(food_codes) == 0:
            if len(attribute_codes) == 0:
                if re.search(r'[\u4E00-\u9FA5]+', s) is None:
                    # 非汉字说明
                    err_flag = True
                    print('错误! 映射的编码格式错误: %s' % s)
            # else:
            #     print('警告! 仅映射了属性编码,没有食品编码,无法对应本体: %s' % '|'.join(attribute_codes))
        if err_flag:
            print('上述错误出现于:%s\n' % err_position)
        return food_code_res, attribute_code_res

    def make_tree_with_ontology(file_path,
                                sheet,
                                food_codes: set,
                                attribute_codes: set,
                                ID='主键',
                                NAME='名称',
                                PARENT_ID='父ID',
                                CODE='食品编码',
                                PATH='路径',
                                ONTOLOGY='分类+属性码'):
        df = read_excel(file_path, sheet=sheet, skip_row=1)
        ids = list(df[ID])
        names = list(df[NAME])
        parents = list(df[PARENT_ID])
        codes = list(df[CODE])
        paths = list(df[PATH])
        ontologys = list(df[ONTOLOGY])
        root = {
            'id': 0,
            'parent_id': -1,
            'name': 'root',
            'code': '',
            'ontology': [],
            'attribute': [],
            'children': []
        }
        nodes = [root]
        added_id = set()
        while len(nodes) > 0:
            node = nodes[0]
            added_id.add(node['id'])
            nodes = nodes[1:]
            for i in range(len(df)):
                if parents[i] == node['id']:
                    ontology, attribute = parse_ontology(
                        ontologys[i], food_codes, attribute_codes, sheet,
                        i + 2, names[i])
                    child = {
                        'id': ids[i],
                        'name': names[i],
                        'children': [],
                        'parent_id': parents[i],
                        'code': codes[i],
                        'path': paths[i],
                        'ontology': ontology,
                        'attribute': attribute
                    }
                    node['children'].append(child)
                    nodes.append(child)
        not_added_id = [x for x in ids if x not in added_id]
        if len(not_added_id) > 0:
            print('错误! 在表 %s 中, 这些序号的结点无法被加入分类树中, 请检查它们和它们的父节点是否正确: %s\n\n\n' %
                  (sheet, '|'.join([str(x) for x in not_added_id])))
        return root

    def get_ontology_codes():
        def get_codes(file, title='编码'):
            df = read_excel(file)
            codes = [''.join(x.strip().split('.')) for x in df[title]]
            codes_set = set(codes)
            if '' in codes_set:
                codes_set.remove('')
            for code in codes_set:
                if codes.count(code) > 1:
                    print('error in file %s! %s编码不唯一!' % (file, code))
            return codes_set

        return get_codes(CONFIG.standard_foods_excel), get_codes(
            CONFIG.standard_attributes_excel)

    food_code_set, attribute_code_set = get_ontology_codes()
    for field, sheet in field_sheetname.items():
        general_tree = make_tree_with_ontology(CONFIG.general_foods_excel,
                                               sheet, food_code_set,
                                               attribute_code_set)
        # 根结点单独处理
        root_id = CONFIG.generate_new_id(field)
        all_data.general_foods[field] = dict()
        all_data.general_foods[field][root_id] = GeneralFoodNode(
            root_id, 'root', '', field)
        id_dict = {-1: root_id}  # excel中的id到实际系统id的映射
        code2id = {}

        # 其余结点DFS遍历进行插入操作即可
        def func(node):
            parent_id = id_dict[node['parent_id']]
            new_id = all_data.insert_general_food(field, parent_id,
                                                  node['name'])
            # 根据ontology及attribute更新映射
            id_dict[node['id']] = new_id
            code2id[node['code']] = new_id
            for standard_code in node['ontology']:
                standard_food_id = food_code2id[standard_code]
                attribute_ids = [
                    attribute_code2id[code] for code in node['attribute']
                ]
                all_data.add_mapping(field, new_id, standard_food_id,
                                     attribute_ids)

        dfs(general_tree, func)
        save_json_file(
            os.path.join(CONFIG.json_tree_folder,
                         '%sCode2SystemID.json' % field), code2id)
        save_json_file(
            os.path.join(CONFIG.json_tree_folder, '%s.json' % field),
            general_tree)
Пример #11
0
 def load_reflections():
     return load_json_file(CONFIG.between_general_systems)