def load_doc_id_hd_classification():
    # 读取每个活动类型的id
    conn = get_connection_mysql(db='common_tag')
    cur = conn.cursor()
    sql_str = "select label_id, parent_id, name from dic_label where parent_id=15"
    cur.execute(sql_str)
    data = cur.fetchall()
    hd_cat_id_map = {}
    for line in data:
        hd_cat_id_map[line[2]] = line[0]
    cur.close()
    conn.close()

    conn = get_connection_mysql(db=environment_configure.MYSQL_DB_DAOGU)
    cur = conn.cursor()
    sql_str = "select doc_id, hd_category from resource_document"
    cur.execute(sql_str)
    data = cur.fetchall()
    docId_cat_map = {}
    for line in data:

        if line[1] != None:
            docId_cat_map[line[0]] = {'name': line[1], 'id': hd_cat_id_map[line[1]]}
    cur.close()
    conn.close()

    return docId_cat_map
def load_region_code():
    conn = get_connection_mysql(db='common_tag')
    cur = conn.cursor()
    sql_str = "select region_id, region_name, all_name, level, continent, continent_code, country, country_code, province,  province_code, city, city_code from dic_region"
    cur.execute(sql_str)
    data = cur.fetchall()

    region_id_map = {}
    for line in data:
        region_id, region_name, all_name, level, continent, continent_code, \
        country, country_code, province, province_code, city, city_code = line

        data = {'regionId': region_id, 'regionName': region_name, 'level': level, \
                'continent': continent, 'continentCode': continent_code, \
                'country': country, 'countryCode': country_code, 'province': province, \
                'provinceCode': province_code, 'city': city, 'cityCode': city_code}
        if continent == None:  # 大洲字段为空,是中国地名
            data['continent'] = '亚洲'
            data['continentCode'] = 1
        for key in list(data.keys()):
            if data[key] == None:
                del data[key]

        region_id_map[all_name] = region_id_map.get(region_name, []) + [data]
    # print(region_id_map)
    cur.close()
    conn.close()
    return region_id_map
def load_simple_image_of_article_code():
    conn = get_connection_mysql(db='trswcm')
    cur = conn.cursor()
    sql_str = "SELECT APPDOCID,APPFILE FROM wcmappendix"
    cur.execute(sql_str)
    data = cur.fetchall()

    APPFILE_articleId_map = {}
    for line in data:
        APPFILE_articleId_map[line[0]] = APPFILE_articleId_map.get(line[0], []) + [line[1]]
    cur.close()
    conn.close()
    return APPFILE_articleId_map
def load_topic_word_code():
    conn = get_connection_mysql(db=environment_configure.MYSQL_DB_DAOGU)
    cur = conn.cursor()
    sql_str = "select id, parent_id, name from refining_entity_dictionary where entity='szdy'"
    cur.execute(sql_str)
    data = cur.fetchall()

    topic_word_id_map = {}
    for line in data:
        topic_word_id_map[line[2]] = topic_word_id_map.get(line[1], []) + [{'id': line[0], 'parent_id': line[1]}]
    cur.close()
    conn.close()
    return topic_word_id_map
def load_person_name_code():
    conn = get_connection_mysql(db=environment_configure.MYSQL_DB_DAOGU)
    cur = conn.cursor()
    sql_str = "select id, name from refining_entity_dictionary where entity='rm'"
    cur.execute(sql_str)
    data = cur.fetchall()

    person_name_id_map = {}
    for line in data:
        person_name_id_map[line[1]] = person_name_id_map.get(line[1], []) + [line[0]]
    cur.close()
    conn.close()
    return person_name_id_map
def load_xi_30_speeches_code():
    conn = get_connection_mysql(db='common_tag')
    cur = conn.cursor()
    sql_str = "select label_id, parent_id, name from dic_label where parent_id=18"
    cur.execute(sql_str)
    data = cur.fetchall()

    xi_30_speeches_id_map = {}
    for line in data:
        xi_30_speeches_id_map[line[2]] = {'id': line[0], 'parent_id': line[1]}
    cur.close()
    conn.close()
    return xi_30_speeches_id_map
def load_all_region_names():
    conn = get_connection_mysql(db='common_tag')
    cur = conn.cursor()
    sql_str = "select region_name from dic_region where level<=4"
    cur.execute(sql_str)
    data = cur.fetchall()
    region_name_list = []
    for line in data:
        #提取原始数据
        region_name = line[0]
        region_name_list.append(region_name)
    cur.close()
    conn.close()
    return region_name_list
def load_all_region_short():
    conn = get_connection_mysql(db='common_tag')
    cur = conn.cursor()
    sql_str = "select region_short from dic_region where country='中华人民共和国'"
    cur.execute(sql_str)
    data = cur.fetchall()
    region_name_list = []
    for line in data:
        #提取原始数据
        region_short = line[0]
        if region_short==None : continue
        region_shorts = region_short.split(",")
        for name in region_shorts:
            region_name_list.append(name)
    cur.close()
    conn.close()
    return region_name_list
def load_orgnization_names():
    conn = get_connection_mysql(host='172.18.89.14', user='******', passwd='Founder123', db=environment_configure.MYSQL_DB_DAOGU)
    cur = conn.cursor()
    sql_str = "select id,full_entity,abb_entity from abb_entity_dictionary where category='jg'"
    cur.execute(sql_str)
    name_id_map = {}
    shorter_full_name_map = {}
    for row in cur:
        shorter_name = row[2].split(', ')
        full_name = row[1]
        id = row[0]
        name_id_map[full_name] = id
        for a_name in shorter_name:
            shorter_full_name_map[a_name] = shorter_full_name_map.get(a_name, []) + [full_name]
    cur.close()
    conn.close()
    return name_id_map, shorter_full_name_map
def load_all_entities():
    words = load_all_region_names()
    words = set(words)
    conn = get_connection_mysql(db=environment_configure.MYSQL_DB_DAOGU)
    cur = conn.cursor()
    sql_str = "select abb_entity from abb_entity_dictionary"
    cur.execute(sql_str)
    data = cur.fetchall()
    for line in data:
        shorter_name = line[0].split(', ')
        for a_name in shorter_name:
            words.add(a_name)
            
    sql_str = "select name from refining_entity_dictionary where status='ok'"
    cur.execute(sql_str)
    data = cur.fetchall()
    for line in data:
            words.add(line[0])               
    cur.close()
    conn.close()
    return words
def load_region_name_data_for_normalization():
    conn = get_connection_mysql(db='common_tag')
    cur = conn.cursor()
    sql_str = "select region_id, region_name, region_short, level, continent, continent_code, country_short, country_code, province,  province_code, city, city_code from dic_region where level<=4"
    cur.execute(sql_str)
    data = cur.fetchall()

    region_id_map = {}
    region_short_map = {}
    for line in data:
        #提取原始数据
        region_id, region_name, region_short, level, continent, continent_code, \
        country_short, country_code, province, province_code, city, city_code = line

        data = {'regionId': region_id, 'regionName': region_name, 'level': level, \
                'continent': continent, 'continentCode': continent_code, \
                'country': country_short, 'countryCode': country_code, 'province': province, \
                'provinceCode': province_code, 'city': city, 'cityCode': city_code}
        if continent == None:  # 大洲字段为空,是中国地名
            data['continent'] = '亚洲'
            data['continentCode'] = 1
        for key in list(data.keys()):
            if data[key] == None:
                del data[key]
        
        #记录标准名称-详细地名
        region_id_map[region_name] = region_id_map.get(region_name, []) + [data]
        #记录别称-标准名称
        if region_short!=None:
            alter_names = region_short.split(',')
            for name in alter_names:
                region_short_map[name] = region_short_map.get(name , []) + [region_name]

    cur.close()
    conn.close()
#     print(region_short_map)
    return region_id_map, region_short_map