Пример #1
0
def get_shop_info(uid):
    conn = poi_ori_pool.connection()
    cursor = conn.cursor()
    cursor.execute('''SELECT name, name_en FROM chat_shopping WHERE id=%s;''', (uid,))
    _res_1 = cursor.fetchone()
    cursor.close()
    conn.close()

    conn = base_data_pool.connection()
    cursor = conn.cursor()
    _res = cursor.execute('''SELECT
  name,
  name_en
FROM chat_shopping
WHERE tag_id = 9 AND id=%s;''', (uid,))
    if _res:
        _res_2 = cursor.fetchone()
    else:
        _res_2 = ('', '')
    cursor.close()
    conn.close()
    _res = list()
    _res.extend(_res_1)
    _res.extend(_res_2)
    return _res
Пример #2
0
def get_city_map():
    conn = base_data_pool.connection()
    cursor = conn.cursor()

    sql = "select id,map_info from city;"
    cursor.execute(sql)
    datas = cursor.fetchall()

    cid2map = {}

    for data in datas:
        if None in data:
            continue

        cid = data[0]
        map_info = data[1]

        try:
            map_info_list = map_info.strip().split(',')

            lat = float(map_info_list[0])
            lgt = float(map_info_list[1])

        except Exception as e:
            continue

        cid2map[cid] = map_info

    cursor.close()
    conn.close()
    logger.debug('[cid size: {}]'.format(len(cid2map)))
    return cid2map
Пример #3
0
def get_tagid_dict(_poi_type):
    _dict = {}
    if _poi_type == 'attr':
        sql = '''SELECT
  tag,
  tag_en,
  original_tag
FROM chat_attraction_tagS
ORDER BY id;'''
    elif _poi_type == 'rest':
        sql = 'select tag,tag_en,original_tag from chat_restaurant_tagS'
    elif _poi_type == 'shop':
        sql = '''SELECT
  tag,
  tag_en,
  original_tag
FROM chat_shopping_tagS
ORDER BY id;'''
    else:
        raise TypeError("Unknown Type: {}".format(_poi_type))

    conn = base_data_pool.connection()
    cursor = conn.cursor(cursor=DictCursor)
    cursor.execute(sql)
    for line in cursor.fetchall():
        tag = line['tag']
        tag_en = line['tag_en']
        original_tag = line['original_tag']
        _tags_set = set()
        for each_tag in original_tag.split('|'):
            if is_legal(each_tag):
                _tags_set.add(each_tag)
        _dict[tuple(_tags_set)] = (tag, tag_en)
    return _dict
Пример #4
0
def insert_city_data():
    sql = '''SELECT
  id,
  name,
  name_en,
  map_info
FROM city;'''
    conn = base_data_pool.connection()
    cursor = conn.cursor(cursor=DictCursor)
    cursor.execute(sql)
    cursor.close()
    conn.close()

    _count = 0
    _total = 0
    for each in cursor.fetchall():
        _total += 1
        try:
            map_info = each['map_info']
            lng, lat = map_info.split(',')
            each["loc"] = {
                "type": "Point",
                "coordinates": [float(lng), float(lat)]
            }
        except Exception as e:
            logger.exception(msg="[map info error]", exc_info=e)
            continue
        city_collections.update({"id": each["id"]}, each, upsert=True)
        _count += 1
    logger.debug("[insert_data][table: city][total: {}][count: {}]".format(
        _total, _count))
Пример #5
0
def get_c_info():
    conn = base_data_pool.connection()
    cursor = conn.cursor()
    cursor.execute('''SELECT
  id,
  map_info
FROM city
WHERE map_info IS NOT NULL AND map_info != 'NULL';''')
    res = {line[0]: line[1] for line in cursor.fetchall()}
    cursor.close()
    conn.close()
    return res
def prepare_city_info():
    conn = base_data_pool.connection()
    cursor = conn.cursor()
    cursor.execute('''SELECT
  id,
  CASE WHEN grade != -1
    THEN grade
  ELSE 100 END AS grade
FROM city;''')
    _res = {line[0]: line[1] for line in cursor.fetchall()}
    cursor.close()
    conn.close()
    return _res
Пример #7
0
def tag2id():
    conn = base_data_pool.connection()
    sql = '''SELECT
  id,
  tag
FROM {};'''.format(tag_s)
    cursor = conn.cursor()
    cursor.execute(sql)
    _dict = {}
    for _id, _tag_name in cursor.fetchall():
        _dict[_tag_name] = str(_id)
    cursor.close()
    conn.close()
    return _dict
Пример #8
0
def init_id2tag():
    conn = base_data_pool.connection()
    sql = '''SELECT
  id,
  Stag
FROM {};'''.format(tag_b)
    cursor = conn.cursor()
    cursor.execute(sql)
    _dict = defaultdict(set)
    for _id, _l_s_tag in cursor.fetchall():
        for each in _l_s_tag.split('|'):
            if is_legal(each):
                _dict[_id].add(each)
    cursor.close()
    conn.close()
    return _dict
Пример #9
0
def poi_ori(poi_type):
    already_merged_city = init_already_merged_city(poi_type=poi_type)
    conn = base_data_pool.connection()
    cursor = conn.cursor()
    cursor.execute('''SELECT id
FROM city;''')
    cids = list(map(lambda x: x[0], cursor.fetchall()))
    cursor.close()
    conn.close()
    for cid in cids:
        if cid in already_merged_city:
            continue
        start = time.time()
        logger.info('[start][cid: {}]'.format(cid))
        pool.apply_async(poi_merge, args=(cid, poi_type))
        logger.info('[end][cid: {}][takes: {}]'.format(cid, time.time() - start))
    pool.join()
Пример #10
0
def prepare_poi_info():
    _dict = {}
    conn = base_data_pool.connection()
    cursor = conn.cursor()
    _sql = '''SELECT
  id,
  city_id
FROM chat_attraction;'''
    cursor.execute(_sql)
    _count = 0
    for line in cursor.fetchall():
        _count += 1
        if _count % 10000 == 0:
            logger.debug("[prepare_pic_task: {}]".format(_count))
        _dict[line[0]] = line[1]
    cursor.close()
    conn.close()
    logger.debug("[prepare_pic_task: {}]".format(_count))
    return _dict
Пример #11
0
def init_city_dict():
    conn = base_data_pool.connection()
    cursor = conn.cursor()
    cursor.execute('''SELECT *
FROM (SELECT
        id,
        city.name      AS name,
        city.name_en   AS name_en,
        country.name   AS country,
        CASE WHEN grade = -1
          THEN 100
        ELSE grade END AS grade
      FROM city
        JOIN country ON city.country_id = country.mid
      ORDER BY grade) t
ORDER BY t.grade;''')
    for line in cursor.fetchall():
        # id, name, name_en, country, grade
        yield line
    cursor.close()
    conn.close()
Пример #12
0
def city_task():
    # private city 30km
    conn = private_data_test_pool.connection()
    cursor = conn.cursor(cursor=DictCursor)
    cursor.execute('''SELECT
      id,
      name,
      name_en,
      map_info
    FROM city;''')
    for line in cursor.fetchall():
        if is_map_info_legal(line['map_info']):
            line['search_kilometer'] = 30
            line['is_private_city'] = True
            yield line
            line['search_kilometer'] = 50
            line['is_private_city'] = True
            yield line
    cursor.close()
    conn.close()

    conn = base_data_pool.connection()
    cursor = conn.cursor(cursor=DictCursor)
    cursor.execute('''SELECT
      id,
      name,
      name_en,
      map_info
    FROM city;''')
    for line in cursor.fetchall():
        if is_map_info_legal(line['map_info']):
            line['search_kilometer'] = 30
            line['is_private_city'] = False
            yield line
            line['search_kilometer'] = 50
            line['is_private_city'] = False
            yield line
    cursor.close()
    conn.close()
Пример #13
0
def filter_data_already_online(_type, _mioji_id, error):
    if _type == 'attr':
        table_name = 'chat_attraction'
    elif _type == 'rest':
        table_name = 'chat_restaurant'
    elif _type == 'shop':
        table_name = 'chat_shopping'
    else:
        raise TypeError("Unknown Type: {}".format(_type))

    _status = None
    try:
        conn = base_data_pool.connection()
        cursor = conn.cursor()
        sql = '''SELECT status_test FROM {} WHERE id='{}';'''.format(table_name, _mioji_id)
        _res = cursor.execute(sql)
        if not _res:
            return False
        _status = cursor.fetchone()[0]
        logger.debug("[type: {}][status: {}]".format(_type, _status))
        cursor.close()
        conn.close()
    except Exception as exc:
        logger.exception(msg="[get online poi status error]", exc_info=exc)

    if not _status:
        return False

    try:
        conn = poi_ori_pool.connection()
        cursor = conn.cursor()
        sql = '''INSERT IGNORE INTO filter_data_already_online (type, mioji_id, error, status) VALUES (%s, %s, %s, %s);'''
        cursor.execute(sql, (_type, _mioji_id, error, _status))
        conn.commit()
        cursor.close()
        conn.close()
    except Exception as exc:
        logger.exception(msg="[insert filter data already online error]", exc_info=exc)
Пример #14
0
def init_task(poi_type, process_num):
    # init city_list
    city_list = [[] for i in range(process_num)]
    already_merged_city = init_already_merged_city(poi_type="{}_data".format(poi_type))
    conn = base_data_pool.connection()
    cursor = conn.cursor()
    cursor.execute('''SELECT id
FROM city;''')
    cids = list(map(lambda x: x[0], cursor.fetchall()))
    cursor.close()
    conn.close()
    _count = 0
    for cid in cids:
        if str(cid) in already_merged_city:
            continue
        _count += 1
        start = time.time()
        logger.info('[start][cid: {}]'.format(cid))
        city_list[_count % process_num].append(cid)
        logger.info('[end][cid: {}][takes: {}]'.format(cid, time.time() - start))

    for each_cids in city_list:
        os.system("nohup python3 multi_city_insert_db.py {} {} &".format(poi_type, ' '.join(each_cids)))
Пример #15
0
def get_max_id():
    global poi_type
    global online_table_name
    global data_source_table
    conn = base_data_pool.connection()
    cursor = conn.cursor()
    cursor.execute('''SELECT max(id) FROM {};'''.format(online_table_name))
    _id_online = cursor.fetchone()[0]
    conn.close()

    conn = poi_ori_pool.connection()
    cursor = conn.cursor()
    cursor.execute('''SELECT max(id) FROM {}_unid;'''.format(poi_type))
    _id_merged = cursor.fetchone()[0]
    conn.close()
    if _id_merged and _id_online:
        return max(_id_online, _id_merged)
    elif _id_merged:
        return _id_merged
    elif _id_online:
        return _id_online
    else:
        raise Exception("无法获取最大 id ,不能进行融合")
Пример #16
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/11/23 上午11:29
# @Author  : Hou Rong
# @Site    :
# @File    : poi_city_mapping.py
# @Software: PyCharm
import re
import pandas
import dataset
from service_platform_conn_pool import base_data_pool, source_info_str, source_info_pool, fetchall
from my_logger import get_logger

logger = get_logger("city_mapping")

conn = base_data_pool.connection()
cursor = conn.cursor()
cursor.execute('''SELECT
  city.id      AS city_id,
  country.mid  AS country_id,
  city.name    AS city_name,
  country.name AS country_name
FROM city
  JOIN country ON city.country_id = country.mid;''')
city_info = {line[0]: line for line in cursor.fetchall()}
conn.close()


def hotels_get_geo_id_by_dest_id(dest_id):
    sql = '''SELECT sid
FROM ota_location
Пример #17
0
def insert_poi_unid(merged_dict, cid_or_geohash):
    global white_list
    global online_table_name
    global data_source_table
    start = time.time()
    # get city country name map_info
    _dev_conn = base_data_pool.connection()
    _dev_cursor = _dev_conn.cursor()
    _dev_cursor.execute('''SELECT
  city.id       AS cid,
  city.name     AS city_name,
  country.name  AS country_name,
  city.map_info AS map_info
FROM city
  JOIN country ON city.country_id = country.mid
WHERE city.id = {};'''.format(cid_or_geohash))
    cid, city_name, country, city_map_info = _dev_cursor.fetchone()
    _dev_cursor.close()
    _dev_conn.close()

    # 去除 total 的写法,费劲但是提升速度不明显,使用 total 后 5.9 秒获取巴黎全部信息,直接获取 6.9 秒,相差可以接受
    # init id list
    # online_ids = set()
    # data_ids = set()
    # for _, s_sid_set in merged_dict.items():
    #     for source, sid in s_sid_set:
    #         if source == 'online':
    #             online_ids.add(sid)
    #         else:
    #             data_ids.add((source, sid))

    # get data total
    # get online data name name_en map_info grade star ranking address url
    total_data = {}
    _dev_conn = base_data_pool.connection()
    _dev_cursor = _dev_conn.cursor()
    try:
        _t = time.time()
        sql = '''SELECT
  id,
  name,
  name_en,
  map_info,
  grade,
  -1,
  ranking,
  address,
  ''
FROM {} WHERE city_id='{}';'''.format(online_table_name, cid_or_geohash)
        _dev_cursor.execute(sql)
        logger.debug('[query][sql: {}][takes: {}]'.format(
            sql,
            time.time() - _t))
    except Exception as exc:
        logger.exception("[sql exc][sql: {}]".format(sql), exc_info=exc)

    for line in _dev_cursor.fetchall():
        total_data[('online', line[0])] = line[1:]
    _dev_cursor.close()
    _dev_conn.close()

    # get poi name name_en map_info grade star ranking address url
    _data_conn = poi_ori_pool.connection()
    _data_cursor = _data_conn.cursor()
    try:
        _t = time.time()
        sql = '''SELECT
source,
id,
name,
name_en,
map_info,
grade,
star,
ranking,
address,
url
FROM {}
WHERE city_id='{}';'''.format(data_source_table, cid_or_geohash)
        _data_cursor.execute(sql)
        logger.debug('[query][sql: {}][takes: {}]'.format(
            sql,
            time.time() - _t))
    except Exception as exc:
        logger.exception("[sql exc][sql: {}]".format(sql), exc_info=exc)
    for line in _data_cursor.fetchall():
        total_data[(line[0], line[1])] = line[2:]
    _data_cursor.close()
    _data_conn.close()

    # init white list total data
    if white_list:
        _s_sid = []
        for _each in white_list:
            _s_sid.extend(_each)

        _ori_conn = poi_ori_pool.connection()
        _ori_cursor = _ori_conn.cursor()
        try:
            _t = time.time()
            query_sql = '''SELECT
  source,
  id,
  name,
  name_en,
  map_info,
  grade,
  star,
  ranking,
  address,
  url
FROM {}
WHERE (source, id) IN ({});'''.format(
                data_source_table,
                ','.join(map(lambda x: "('{}', '{}')".format(*x), _s_sid)))
            _ori_cursor.execute(query_sql)
            logger.debug('[query][sql: {}][takes: {}]'.format(
                sql,
                time.time() - _t))
        except Exception as exc:
            logger.exception("[sql exc][sql: {}]".format(sql), exc_info=exc)
        for line in _ori_cursor.fetchall():
            total_data[(line[0], line[1])] = line[2:]
        _ori_cursor.close()
        _ori_conn.close()

    data = []
    for uid, s_sid_set in merged_dict.items():
        for source, sid in s_sid_set:
            # name name_en map_info grade star ranking address url
            name, name_en, map_info, grade, star, ranking, address, url = total_data[
                (source, sid)]
            if not is_legal(name):
                name = ''
            if not is_legal(name_en):
                name_en = ''
            if not is_legal(grade):
                grade = -1.0
            if not is_legal(star):
                star = -1.0
            if not is_legal(ranking):
                ranking = -1.0
            if not is_legal(address):
                address = ''
            if not is_legal(url):
                url = ''

            data.append(
                (uid, cid, city_name, country, city_map_info, source, sid,
                 name, name_en, map_info, grade, star, ranking, address, url))

            #     data = []
            #     _dev_conn = base_data_pool.connection()
            #     _dev_cursor = _dev_conn.cursor()
            #     _data_conn = poi_ori_pool.connection()
            #     _data_cursor = _data_conn.cursor()
            #     for uid, s_sid_set in merged_dict.items():
            #         for source, sid in s_sid_set:
            #             if source == 'online':
            #                 _dev_cursor.execute('''SELECT
            #   name,
            #   name_en,
            #   map_info,
            #   grade,
            #   -1,
            #   ranking,
            #   address,
            #   ''
            # FROM chat_attraction
            # WHERE id = '{}';'''.format(sid))
            #                 try:
            #                     name, name_en, map_info, grade, star, ranking, address, url = _dev_cursor.fetchone()
            #                 except Exception as exc:
            #                     logger.exception("[error sql query][source: {}][sid: {}]".format(source, sid), exc_info=exc)
            #                     continue
            #             else:
            #                 _data_cursor.execute('''SELECT
            #   CASE WHEN name NOT IN ('NULL', '', NULL)
            #     THEN name
            #   ELSE '' END,
            #   CASE WHEN name_en NOT IN ('NULL', '', NULL)
            #     THEN name_en
            #   ELSE '' END,
            #   map_info,
            #   CASE WHEN grade NOT IN ('NULL', '', NULL)
            #     THEN grade
            #   ELSE -1.0 END AS grade,
            #   CASE WHEN star NOT IN ('NULL', '', NULL)
            #     THEN star
            #   ELSE -1.0 END AS star,
            #   CASE WHEN ranking NOT IN ('NULL', '', NULL)
            #     THEN ranking
            #   ELSE -1.0 END AS ranking,
            #   CASE WHEN address NOT IN ('NULL', '', NULL)
            #     THEN address
            #   ELSE '' END,
            #   CASE WHEN url NOT IN ('null', '', NULL)
            #     THEN url
            #   ELSE '' END
            # FROM attr
            # WHERE source = '{}' AND id = '{}';'''.format(source, sid))
            #                 try:
            #                     name, name_en, map_info, grade, star, ranking, address, url = _data_cursor.fetchone()
            #                 except Exception as exc:
            #                     logger.exception("[error sql query][source: {}][sid: {}]".format(source, sid), exc_info=exc)
            #                     continue
            #
            #             data.append((uid, cid, city_name, country, city_map_info, source, sid, name, name_en, map_info, grade,
            #                          star, ranking, address, url))
            #     _dev_cursor.close()
            #     _data_cursor.close()
            #     _dev_conn.close()
            #     _data_conn.close()

    _final_conn = poi_ori_pool.connection()
    _final_cursor = _final_conn.cursor()
    # for d in data:
    try:
        _t = time.time()
        sql = '''REPLACE INTO {}_unid (id, city_id, city_name, country_name, city_map_info, source, source_id, name, name_en, map_info, grade, star, ranking, address, url)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'''.format(
            poi_type)
        _final_cursor.executemany(sql, data)
        logger.debug('[query][sql: {}][takes: {}]'.format(
            sql,
            time.time() - _t))
    except Exception as exc:
        logger.exception("[insert unid table error]", exc_info=exc)
    _final_conn.commit()
    _final_cursor.close()
    _final_conn.close()

    logger.info(
        "[finish prepare data][city: {}][line_count: {}][takes: {}]".format(
            cid_or_geohash, len(data),
            time.time() - start))
Пример #18
0
def get_poi_dict(city_id):
    """
    get all attraction info in the city
    :param city_id: city mioji id
    :return: city info dict
    """
    global white_list_data
    _poi_dict = defaultdict(dict)
    _online_official_data = {}
    _online_nonofficial_data = {}
    # get whole data process
    conn = data_process_pool.connection()
    cursor = conn.cursor(cursor=DictCursor)
    sql = '''SELECT *
FROM {}
WHERE city_id='{}';'''.format(data_process_table_name, city_id)
    _t = time.time()
    cursor.execute(sql)
    logger.debug('[query][sql: {}][takes: {}]'.format(sql, time.time() - _t))
    for each in cursor.fetchall():
        if int(each["official"]) == 0:
            _online_nonofficial_data[each['id']] = each
        else:
            _online_official_data[each['id']] = each
    cursor.close()
    conn.close()

    # get whole base data, and update data process result
    conn = base_data_pool.connection()
    cursor = conn.cursor(cursor=DictCursor)
    sql = '''SELECT *
    FROM {}
    WHERE city_id='{}';'''.format(data_process_table_name, city_id)
    _t = time.time()
    cursor.execute(sql)
    logger.debug('[query][sql: {}][takes: {}]'.format(sql, time.time() - _t))
    for each in cursor.fetchall():
        each.pop('tag_id')
        if int(each["official"]) == 0:
            _online_nonofficial_data[each['id']].update(each)
        else:
            _online_official_data[each['id']].update(each)
    cursor.close()
    conn.close()

    # get whole source data
    conn = poi_ori_pool.connection()
    cursor = conn.cursor(cursor=DictCursor)
    sql = "select * from {} where city_id='{}'".format(poi_type, city_id)
    _t = time.time()
    cursor.execute(sql)
    logger.debug('[query][sql: {}][takes: {}]'.format(sql, time.time() - _t))
    for line in cursor.fetchall():
        source = line['source']
        source_id = line['id']
        _poi_dict[(source, source_id)] = line
    cursor.close()
    conn.close()

    for k, v in white_list_data.items():
        _poi_dict[k] = v
    return _poi_dict, _online_official_data, _online_nonofficial_data