Exemplo n.º 1
0
def generate_cross_dict():
    __dict = {}
    conn = service_platform_pool.connection()
    cursor = conn.cursor()
    cursor.execute('''SELECT
  id,
  beentocounts,
  plantocounts
FROM detail_total_qyer_20171209a;''')
    for line in cursor.fetchall():
        if line[1]:
            beentocount = int(line[1])
        else:
            beentocount = -1
        if line[2]:
            plantocount = int(line[2])
        else:
            plantocount = -1

        if beentocount not in (-1, 0):
            beentocounts = json.dumps({'qyer': beentocount})
        else:
            beentocounts = '{}'

        if plantocount not in (-1, 0):
            plantocounts = json.dumps({'qyer': plantocount})
        else:
            plantocounts = '{}'

        __dict[line[0]] = (beentocounts, plantocounts)
    print("[cross dict finished]")
    return __dict
Exemplo n.º 2
0
def update_seek_table(table_name, update_time):
    local_conn = service_platform_pool.connection()
    local_cursor = local_conn.cursor()
    local_cursor.execute('''REPLACE INTO data_insert_seek VALUES (%s, %s);''',
                         (table_name, update_time))
    logger.debug("[update seek table][table_name: {}][update_time: {}]".format(
        table_name, update_time))
    local_conn.commit()
    local_cursor.close()
    local_conn.close()
Exemplo n.º 3
0
def insert_db(table_name, data):
    sql = '''REPLACE INTO {} (source, source_id, city_id, country_id, hotel_url) VALUES (%s, %s, 'NULL', 'NULL', %s)'''.format(
        table_name)
    conn = service_platform_pool.connection()
    cursor = conn.cursor()
    res = cursor.executemany(sql, data)
    conn.commit()
    cursor.close()
    conn.close()
    logger.info("[add task][table: {}][count: {}][insert: {}]".format(table_name, len(data), res))
Exemplo n.º 4
0
def insert_db(data):
    # return
    sql = '''INSERT IGNORE INTO NewStation.station_src (station, src_city, src_country, map_info, station_city_map_info, status, belong_city_id, station_code_from_europeRail, src_city_code, src_station_code)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
    conn = service_platform_pool.connection()
    cursor = conn.cursor()
    _res = cursor.executemany(sql, data)
    conn.commit()
    cursor.close()
    conn.close()
    logger.info('[total: {}][insert: {}]'.format(len(data), _res))
Exemplo n.º 5
0
def update_db(data):
    __conn = service_platform_pool.connection()
    __cursor = __conn.cursor()
    print('start', line)
    __res = __cursor.executemany(
        '''UPDATE view_data.attr_qyer_1216
    SET city_id = %s
    WHERE source_city_id = %s;''', data)
    print('end', line, len(data), __res, _count)
    __conn.commit()
    __conn.close()
Exemplo n.º 6
0
def update_test():
    conn = service_platform_pool.connection()
    cursor = conn.cursor()
    query_sql = '''UPDATE image_wanle_huantaoyou_20171023a
SET info = %s
WHERE file_name = %s;'''
    cursor.execute(query_sql, ({
        "a": "1"
    }, '23498847e5190ef6849a5bfcf0e506d2.png'))
    # for line in cursor.fetchall():
    #     print(str(line[-1]))
    conn.commit()
    cursor.close()
    conn.close()
Exemplo n.º 7
0
def test():
    conn = service_platform_pool.connection()
    cursor = conn.cursor()
    query_sql = '''SELECT
  file_name,
  source,
  sid,
  info->'$.p_hash' as p_hash
FROM image_wanle_huantaoyou_20171023a;'''
    cursor.execute(query_sql)
    for line in cursor.fetchall():
        print(str(line[-1]))
    cursor.close()
    conn.close()
Exemplo n.º 8
0
def get_seek(table_name):
    local_conn = service_platform_pool.connection()
    local_cursor = local_conn.cursor()
    sql = '''SELECT seek
    FROM data_insert_id_seek WHERE task_name='{}';'''.format(table_name)
    local_cursor.execute(sql)
    _res = local_cursor.fetchone()
    local_cursor.close()
    local_conn.close()

    if _res is not None:
        return _res[0]
    else:
        return 0
Exemplo n.º 9
0
def update_sql(data):
    sql = '''UPDATE pic_detect_task
SET status = 0
WHERE id in ({});'''.format(
        ','.join(
            map(lambda x: "'{}'".format(x), data)
        )
    )
    conn = service_platform_pool.connection()
    cursor = conn.cursor()
    _res = cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    logger.info("[total: {}][execute: {}]".format(len(data), _res))
Exemplo n.º 10
0
def get_task_info():
    # detect list
    # get image
    local_conn = service_platform_pool.connection()
    local_cursor = local_conn.cursor()
    local_cursor.execute('''SELECT TABLE_NAME
        FROM information_schema.TABLES
        WHERE TABLE_SCHEMA = 'ServicePlatform';''')
    # 强制要求按照 tag 的先后顺序排列
    list_tables = list(
        sorted(filter(lambda x: x.startswith('list_total_qyer'),
                      map(lambda x: x[0], local_cursor.fetchall())),
               key=lambda x: x.split('_')[-1]))
    local_cursor.close()
    local_conn.close()

    for each_table_name in list_tables:
        get_per_table_task_info(each_table_name)
def insert_task_data(data, _count):
    # 插入 pic detect task 数据
    insert_sql = '''INSERT IGNORE INTO pic_detect_task (city_id, city_grade, poi_id, pic_name) VALUES (%s, %s, %s, %s);'''

    max_retry_times = 3
    while max_retry_times:
        max_retry_times -= 1
        try:
            conn = service_platform_pool.connection()
            cursor = conn.cursor()
            _insert_count = cursor.executemany(insert_sql, data)
            conn.commit()
            cursor.close()
            conn.close()
            logger.debug(
                "[insert data][now count: {}][insert data: {}][insert_ignore_count: {}]"
                .format(_count, len(data), _insert_count))
            break
        except Exception as exc:
            logger.exception(msg="[run sql error]", exc_info=exc)
Exemplo n.º 12
0
def insert_data(limit=1000):
    logger.debug("start insert data")
    logger.debug("get all view name")

    local_conn = service_platform_pool.connection()
    local_cursor = local_conn.cursor()
    local_cursor.execute('''SELECT TABLE_NAME
    FROM information_schema.VIEWS
    WHERE TABLE_SCHEMA = 'ServicePlatform';''')

    # 强制要求按照 tag 的先后顺序排列
    view_list = list(
        sorted(filter(lambda x: x.startswith('view_final_'),
                      map(lambda x: x[0], local_cursor.fetchall())),
               key=lambda x: x.split('_')[-1]))
    local_cursor.close()

    for each_view_final in view_list:
        start = time.time()
        u_time = get_seek(each_view_final)

        try:
            _, _, view_type, view_source, view_tag = each_view_final.split('_')
        except Exception:
            logger.error('[Unknown View Final: {}]'.format(each_view_final))
            continue

        create_table(view_type, view_tag)
        to_table_name = "{}_final_{}".format(view_type, view_tag)
        if view_type in ('hotel', 'attr', 'rest', 'total'):
            local_cursor = local_conn.cursor()
            update_time_sql = '''SELECT {0}
    FROM {1}
    WHERE {0} >= '{2}'
    ORDER BY {0}
    LIMIT {3};'''.format(time_key[view_type], each_view_final, u_time, limit)
            line_count = local_cursor.execute(update_time_sql)
            if line_count == 0:
                continue
            # get final update time for inserting db next time
            final_update_time = max(
                map(lambda x: x[0], local_cursor.fetchall()))
            local_cursor.close()

            # replace into final data
            local_cursor = local_conn.cursor()
            query_sql = '''REPLACE INTO {1}.{2} SELECT *
    FROM {3}
    WHERE {0} >= '{4}'
    ORDER BY {0}
    LIMIT {5};'''.format(time_key[view_type], final_database, to_table_name,
                         each_view_final, u_time, limit)

            try:
                replace_count = local_cursor.execute(query_sql)
            except Exception as e:
                logger.exception(msg="[table_name: {}][error_sql: {}]".format(
                    each_view_final, query_sql),
                                 exc_info=e)
                continue
            local_conn.commit()
            local_cursor.close()
        else:
            raise TypeError("Unknown Type: {}".format(view_type))

        update_seek_table(each_view_final, final_update_time)
        logger.debug(
            "[insert data][to: {}][from: {}][update_time: {}][final_update_time: {}][limit: {}][line_count: {}]["
            "replace_count: {}][takes: {}]".format(to_table_name,
                                                   each_view_final, u_time,
                                                   final_update_time, limit,
                                                   line_count, replace_count,
                                                   time.time() - start))
    local_conn.close()
Exemplo n.º 13
0
def insert_error_map_info_task(duplicate_map_info_set, task_table, task_type):
    # todo 当前由于 qyer 的数据表小,可以全量扫描,之后增加其他表的时候,需要修改此方法
    # get all task info
    logger.debug("[total duplicate map_info set][len: {}]".format(
        len(duplicate_map_info_set)))
    _count = 0
    for duplicate_map_info in chunks(list(duplicate_map_info_set), 500):
        _count += 500
        logger.debug("[duplicate map_info][now: {}]".format(_count))
        data = []
        retry_times = 4
        while retry_times:
            retry_times -= 1
            try:
                _conn = service_platform_pool.connection()
                _cursor = _conn.cursor()
                if task_type == 'hotel':
                    # 酒店数据不可用
                    query_sql = '''SELECT
          source,
          source_id,
          address
        FROM {}
        WHERE map_info IN ({});'''.format(
                        task_table, ",".join(
                            map(
                                lambda x: "'{}'".format(x),
                                filter(lambda x: map_info_legal(x),
                                       duplicate_map_info))))
                elif task_type in ('attr', 'shop', 'rest', 'total'):
                    query_sql = '''SELECT
          source,
          id,
          address
        FROM {}
        WHERE map_info IN ({});'''.format(
                        task_table, ",".join(
                            map(
                                lambda x: "'{}'".format(x),
                                filter(lambda x: map_info_legal(x),
                                       duplicate_map_info))))
                else:
                    continue
                _res = _cursor.execute(query_sql)
                # get all data
                for line in _cursor.fetchall():
                    if not is_legal(line[2]):
                        continue
                    data.append((task_table, line[0], line[1],
                                 json.dumps({'address': line[2]})))
                _cursor.close()
                _conn.close()
                logger.debug(
                    "[get duplicate map_info task][len: {}]".format(_res))
                break
            except Exception as exc:
                logger.exception(
                    msg="[get duplicate map_info task error][retry_times: {}]",
                    exc_info=exc)

        insert_retry_times = 4
        while insert_retry_times:
            insert_retry_times -= 1
            try:
                # insert all data
                _conn = service_platform_pool.connection()
                _cursor = _conn.cursor()
                _res = _cursor.executemany(
                    '''INSERT IGNORE INTO supplement_field (`table_name`, `type`, `source`, `sid`, `other_info`) VALUES (%s, 'map_info', %s, %s, %s)''',
                    data)
                _conn.commit()
                _cursor.close()
                _conn.close()
                logger.debug(
                    "[get duplicate map_info task][len: {}]".format(_res))
                break
            except Exception as exc:
                logger.exception(
                    msg="[insert supplement filed][retry_times: {}]".format(
                        insert_retry_times),
                    exc_info=exc)
Exemplo n.º 14
0
def insert_data(limit=1000):
    logger.debug("start insert data")
    logger.debug("get all table name")

    local_conn = service_platform_pool.connection()
    local_cursor = local_conn.cursor()
    local_cursor.execute('''SELECT TABLE_NAME
    FROM information_schema.TABLES
    WHERE TABLE_SCHEMA = 'ServicePlatform';''')

    # 强制要求按照 tag 的先后顺序排列
    table_list = list(
        sorted(filter(lambda x: x.startswith('images_'),
                      map(lambda x: x[0], local_cursor.fetchall())),
               key=lambda x: x.split('_')[-1]))
    local_cursor.close()

    for each_table_final in table_list:
        start = time.time()
        seek = get_seek(each_table_final)

        try:
            _, task_type, task_source, task_tag = each_table_final.split('_')
        except Exception:
            logger.error('[Unknown Task Final: {}]'.format(each_table_final))
            continue

        create_table(task_type, task_tag)
        if task_type in ('hotel', 'attr', 'rest', 'total'):
            to_table_name = "{}_images_final_{}".format(
                image_type_dict[task_type], task_tag)
            local_cursor = local_conn.cursor()
            get_id_sql = '''SELECT id
    FROM {0}
    WHERE id > '{1}'
    ORDER BY id
    LIMIT {2};'''.format(each_table_final, seek, limit)

            line_count = local_cursor.execute(get_id_sql)
            if line_count == 0:
                continue
            # get final update time for inserting db next time
            final_seek = max(map(lambda x: x[0], local_cursor.fetchall()))
            local_cursor.close()

            # replace into final data
            local_cursor = local_conn.cursor()
            if task_type in ('attr', 'rest', 'total'):
                query_sql = '''REPLACE INTO {0}.{1}
(file_name, source, sid, url, pic_size, bucket_name, url_md5, pic_md5, `use`, part, date)
  SELECT
    file_name,
    source,
    sid,
    url,
    pic_size,
    bucket_name,
    url_md5,
    pic_md5,
    `use`,
    part,
    date
  FROM
    {2} where id > {3} ORDER BY id LIMIT {4};;'''.format(
                    final_database, to_table_name, each_table_final, seek,
                    limit)
            elif task_type == 'hotel':
                query_sql = '''REPLACE INTO {0}.{1} (source, source_id, pic_url, pic_md5, part, hotel_id, status, update_date, size, flag, file_md5)
  SELECT
    source,
    source_id,
    pic_url,
    pic_md5,
    part,
    hotel_id,
    status,
    update_date,
    size,
    flag,
    file_md5
  FROM
    {2}
  WHERE id > {3}
  ORDER BY id
  LIMIT {4};'''.format(final_database, to_table_name, each_table_final, seek,
                       limit)
            else:
                continue

            try:
                replace_count = local_cursor.execute(query_sql)
            except Exception as e:
                logger.exception(msg="[table_name: {}][error_sql: {}]".format(
                    each_table_final, query_sql),
                                 exc_info=e)
                continue
            local_conn.commit()
            local_cursor.close()
        else:
            raise TypeError("Unknown Type: {}".format(task_type))

        update_seek_table(each_table_final, final_seek)
        logger.debug(
            "[insert data][to: {}][from: {}][seek: {}][final_seek: {}][limit: {}][line_count: {}]["
            "replace_count: {}][takes: {}]".format(to_table_name,
                                                   each_table_final, seek,
                                                   final_seek, limit,
                                                   line_count, replace_count,
                                                   time.time() - start))
    local_conn.close()
Exemplo n.º 15
0
def insert_hotel_data():
    logger.debug("start prepare mongo data")
    logger.debug("get all table name")

    local_conn = service_platform_pool.connection()
    local_cursor = local_conn.cursor()
    local_cursor.execute('''SELECT TABLE_NAME
        FROM information_schema.TABLES
        WHERE TABLE_SCHEMA = 'ServicePlatform';''')

    # 强制要求按照 tag 的先后顺序排列
    table_list = list(
        sorted(filter(
            lambda x: x.startswith('detail_hotel_') and x.split('_')[-1] !=
            "test" and x.split("_")[-1] > "20170928d",
            map(lambda x: x[0], local_cursor.fetchall())),
               key=lambda x: x.split('_')[-2:]))
    local_cursor.close()
    local_conn.close()

    for each_table_name in table_list:
        _count = 0
        data = []
        delete_sid = []
        delete_source = ""
        sql = '''SELECT
          source,
          source_id,
          hotel_name,
          hotel_name_en,
          map_info,
          address,
          source_city_id
        FROM {};'''.format(each_table_name)
        for each_data in MysqlSource(data_db,
                                     table_or_query=sql,
                                     size=10000,
                                     is_table=False,
                                     is_dict_cursor=True):
            map_info = each_data['map_info']
            lng, lat = map_info.split(',')
            each_data["loc"] = {
                "type": "Point",
                "coordinates": [float(lng), float(lat)]
            }
            data.append(each_data)

            delete_source = each_data["source"]
            delete_sid.append(each_data["source_id"])

            _count += 1
            if _count % 10000 == 0:
                try:
                    hotel_collections.delete_many({
                        "source": delete_source,
                        "source_id": {
                            "$in": delete_sid
                        }
                    })
                    hotel_collections.insert_many(data)
                except BulkWriteError as bwe:
                    logger.exception(
                        msg="[bwe error][bwe details: {}]".format(bwe.details))
                except Exception as exc:
                    logger.exception(msg="[insert data error]", exc_info=exc)
                data = []
                delete_sid = []
                logger.debug("[insert_data][table: {}][count: {}]".format(
                    each_table_name, _count))

        if data:
            try:
                hotel_collections.delete_many({
                    "source": delete_source,
                    "source_id": {
                        "$in": delete_sid
                    }
                })
                hotel_collections.insert_many(data)
            except BulkWriteError as bwe:
                logger.exception(
                    msg="[bwe error][bwe details: {}]".format(bwe.details))
            except Exception as exc:
                logger.exception(msg="[insert data error]", exc_info=exc)
        logger.debug("[insert_data][table: {}][count: {}]".format(
            each_table_name, _count))