Exemplo n.º 1
0
def get_task():
    sql = '''SELECT
  source,
  sid,
  suggest_type,
  suggest,
  city_id,
  country_id,
  s_city,
  s_region,
  s_country,
  s_extra,
  label_batch,
  others_info
FROM ota_location_bak_1215;'''
    data = []
    _count = 0
    for line in MysqlSource(poi_ori_config, table_or_query=sql,
                            size=2000, is_table=False,
                            is_dict_cursor=False):
        _count += 1
        new_line = list(line)
        new_line.insert(1, encode(line[1]))
        data.append(new_line)
        if len(data) == 1000:
            logger.info("[count: {}]".format(_count))
            update_sql(data)
            data = []
    update_sql(data)
Exemplo n.º 2
0
def move_img_data(source_table_name):
    query_sql = '''SELECT
  file_name,
  source,
  sid,
  url,
  pic_size,
  bucket_name,
  url_md5,
  pic_md5,
  `use`,
  part,
  date,
  info
FROM {};'''.format(source_table_name)
    data = []
    _count = 0
    for line in MysqlSource(poi_ori_config,
                            table_or_query=query_sql,
                            size=10000,
                            is_table=False,
                            is_dict_cursor=True):
        _count += 1
        data.append(line)
        if len(data) == 1000:
            insert_data(data)
            data = []
            logger.debug("[table_name: {}][move data][count: {}]".format(
                source_table_name, _count))
    if data:
        insert_data(data)
Exemplo n.º 3
0
def move_img_data():
    query_sql = '''SELECT
  file_name,
  sid,
  url,
  pic_size,
  bucket_name,
  url_md5,
  pic_md5,
  `use`,
  source,
  rank,
  fixrank,
  status,
  date
FROM rest_bucket_relation;'''
    data = []
    _count = 0
    for line in MysqlSource(poi_ori_config,
                            table_or_query=query_sql,
                            size=10000,
                            is_table=False,
                            is_dict_cursor=True):
        _count += 1
        data.append(line)
        if len(data) == 1000:
            insert_data(data)
            data = []
            logger.debug("[move data][count: {}]".format(_count))
    if data:
        insert_data(data)
Exemplo n.º 4
0
def _get_per_table_task_info(table_name):
    global offset
    sql = '''SELECT
            {0}.city_id                    AS poi_city_id,
            {0}.source                     AS poi_source,
            sid                            AS poi_sid,
            file_name                      AS pic_name
          FROM BaseDataFinal.poi_images
            JOIN {0}
              ON BaseDataFinal.poi_images.source = {0}.source AND
                 BaseDataFinal.poi_images.sid = {0}.source_id
          WHERE city_id != 'NULL' and BaseDataFinal.poi_images.`use` != '0'
          LIMIT {1},999999999;'''.format(table_name, offset)
    data = []
    _count = 0
    for line in MysqlSource(service_platform_conf,
                            table_or_query=sql,
                            size=10000,
                            is_table=False,
                            is_dict_cursor=True):
        cid = line['poi_city_id']
        c_grade = cid2grade[cid]
        source = line['poi_source']
        sid = line['poi_sid']
        pic_name = line['pic_name']
        poi_id = '###'.join([source, sid])

        data.append((cid, c_grade, poi_id, pic_name))
        _count += 1
        offset += 1
        if len(data) == 2000:
            insert_task_data(data, _count)
            data = []
    insert_task_data(data, _count)
Exemplo n.º 5
0
def to_data(table_name):
    global offset
    select_sql = '''SELECT
  source,
  source_id,
  others_info
FROM detail_hotel_{0}'''.format(table_name)
    try:
        _data = []
        for result in MysqlSource(db_config=config,
                                  table_or_query=select_sql,
                                  size=10000,
                                  is_table=False,
                                  is_dict_cursor=True):
            offset += 1
            others_info = result['others_info']
            if not others_info:
                continue
            others_info = json.loads(others_info)
            if 'first_img' not in others_info:
                continue
            first_img_url = others_info['first_img']

            if not is_legal(first_img_url):
                continue
            md5_str = encode(first_img_url)
            source = result['source']
            source_id = result['source_id']
            _data.append((source, source_id, md5_str))
            if len(_data) % 1000 == 0:
                insert_db(table_name, _data)
                _data = []
        insert_db(table_name, _data)
    except Exception as exc:
        logger.exception(msg="[入库出现异常]", exc_info=exc)
Exemplo n.º 6
0
def _img_ori(_poi_type):
    global data
    global offset
    query_sql = '''SELECT
  id,
  image_list,
  first_image,
  official
FROM {}
ORDER BY id
LIMIT {}, 99999999999999;'''.format(table_name, offset)

    _count = 0
    cache = []
    for _uid, _old_img_list, _old_first_img, _official in MysqlSource(poi_ori_config, table_or_query=query_sql,
                                                                      size=500, is_table=False,
                                                                      is_dict_cursor=False):
        cache.append((_uid, _old_img_list, _old_first_img, _official))

    for _uid, _old_img_list, _old_first_img, _official in cache:
        pool.apply_async(_update_per_uid_img, (_uid, _poi_type, _old_img_list, _old_first_img, _official))
        _count += 1
        if _count % 1000 == 0:
            pool.join()
            update_img()
            data = []
            offset += 1000
        update_img()
    pool.join()
    update_img()
Exemplo n.º 7
0
def get_tasks():
    query_sql = '''SELECT uid FROM hotel ORDER BY uid;'''

    for _l in MysqlSource(db_config=spider_data_base_data_config,
                          table_or_query=query_sql,
                          size=10000,
                          is_table=False,
                          is_dict_cursor=False):
        yield _l[0]
Exemplo n.º 8
0
def get_file_name():
    query_sql = '''SELECT source, source_id, pic_md5
FROM hotel_images
WHERE part = '20171127a' AND info IS NULL;'''
    for line in MysqlSource(poi_ori_config,
                            table_or_query=query_sql,
                            size=10000,
                            is_table=False,
                            is_dict_cursor=False):
        yield line
Exemplo n.º 9
0
def main():
    _sql = '''SELECT TABLE_NAME
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = 'tmp';'''
    for line in MysqlSource(db_config=spider_task_data_config,
                            table_or_query=_sql,
                            size=10000, is_table=False,
                            is_dict_cursor=False):
        table_name = line[0]
        logger.info("[start][table_name: {}]".format(table_name))
        detect_table(table_name=table_name)
Exemplo n.º 10
0
def get_tasks():
    query_sql = '''SELECT *
FROM ota_location
WHERE source = 'daodao' AND city_id in ('11444','60177','12344','60178','10436','60179','60180','30118','30140','50053','60181','10648','11424','60182','60183','50117','20096');'''

    for _l in MysqlSource(db_config=source_info_config,
                          table_or_query=query_sql,
                          size=10000,
                          is_table=False,
                          is_dict_cursor=True):
        yield _l
Exemplo n.º 11
0
def get_tasks():
    query_sql = '''SELECT *
FROM ota_location
WHERE source = 'qyer' AND
      (json_extract(others_info, '$.from') IS NOT NULL OR json_extract(others_info, '$.form') IS NOT NULL) limit 10;'''

    for _l in MysqlSource(db_config=source_info_config,
                          table_or_query=query_sql,
                          size=10000, is_table=False,
                          is_dict_cursor=True):
        yield _l
Exemplo n.º 12
0
def get_task():
    sql = '''SELECT mioji_id FROM filter_data_already_online;'''
    data = []
    for line in MysqlSource(poi_ori_config, table_or_query=sql,
                            size=10000, is_table=False,
                            is_dict_cursor=True):
        data.append(line['mioji_id'])
        if len(data) == 2000:
            update_sql(data)
            data = []
    update_sql(data)
Exemplo n.º 13
0
def get_tasks(city_id=None, config=None):
    query_sql = '''SELECT *
FROM ota_location
WHERE source = 'daodao' AND city_id in {0};'''.format(tuple(city_id))

    for _l in MysqlSource(db_config=config,
                          table_or_query=query_sql,
                          size=10000,
                          is_table=False,
                          is_dict_cursor=True):
        yield _l
Exemplo n.º 14
0
def get_task():
    sql = '''SELECT sid
FROM poi_images
WHERE source = 'qyer' AND `use` = 1
GROUP BY sid
HAVING count(*) > 90;'''
    for line in MysqlSource(poi_ori_config,
                            table_or_query=sql,
                            size=10000,
                            is_table=False,
                            is_dict_cursor=True):
        get_file(line['sid'])
Exemplo n.º 15
0
def detect_table(table_name):
    c_dict = get_c_info()

    _sql = '''SELECT map_info, city_id, source, source_id
FROM {}
WHERE city_id != 'NULL' AND city_id IS NOT NULL;'''.format(table_name)

    offset = 0
    error = 0
    new_data = []

    update_sql_name = 'no_cid_hotel|_|{}.sql'.format(table_name)
    del_sql_name = 'no_cid_hotel_del|_|{}.sql'.format(table_name)
    f_res = open(os.path.join(SQL_PATH, update_sql_name), 'w')
    f_del = open(os.path.join(SQL_PATH, del_sql_name), 'w')

    for line in MysqlSource(db_config=spider_task_data_config,
                            table_or_query=_sql,
                            size=10000, is_table=False,
                            is_dict_cursor=True):
        offset += 1
        _map_info = line['map_info']
        _city_id = line['city_id']
        _source = line['source']
        _source_id = line['source_id']
        _c_map_info = c_dict.get(_city_id)

        if not _c_map_info:
            continue

        dist = get_distance(_c_map_info, _map_info)

        if dist == -1:
            continue

        if get_distance(_c_map_info, _map_info) > 50:
            error += 1

            new_data.append((_source, _source_id, _city_id))
            if len(new_data) == 200:
                get_sql(table_name=table_name, res_f=f_res, res_del_f=f_del, data=new_data)
                new_data = []
            logger.info(
                "[error_distance][offset: {}][error: {}][dist: {}][source: {}][source_id: {}][city_id: {}]".format(
                    offset, error, dist, _source, _source_id, _city_id))
    if new_data:
        get_sql(table_name=table_name, res_f=f_res, res_del_f=f_del, data=new_data)
    f_res.close()
    f_del.close()

    update_table(u_sql_name=update_sql_name, d_sql_name=del_sql_name)
Exemplo n.º 16
0
def get_tasks():
    # global offset
    # global pre_offset
    # query_sql = '''SELECT
    #   source,
    #   pic_md5,
    #   file_md5,
    #   info
    # FROM hotel_images
    # ORDER BY source,source_id
    # LIMIT {},999999999999999;'''.format(offset)
    #
    # for source, file_name, file_md5, info in MysqlSource(db_config=base_data_final_config, table_or_query=query_sql,
    #                                                      size=10000, is_table=False,
    #                                                      is_dict_cursor=False):
    #     pre_offset += 1
    #     if not info:
    #         yield source, file_name, file_md5, 'mioji-hotel', 'hotel'

    global offset
    global pre_offset
    query_sql = '''SELECT
  source,
  file_name,
  pic_md5,
  bucket_name,
  info
FROM poi_images
ORDER BY source, sid
LIMIT {}, 999999999999999;'''.format(offset)

    for source, file_name, file_md5, bucket, info in MysqlSource(
            db_config=base_data_final_config,
            table_or_query=query_sql,
            size=10000,
            is_table=False,
            is_dict_cursor=False):
        pre_offset += 1
        if not info:
            if 'attr' in bucket:
                # bucket_name = 'mioji-attr'
                continue
            elif 'rest' in bucket:
                bucket_name = 'mioji-rest'
                # continue
            elif 'shop' in bucket:
                # bucket_name = 'mioji-shop'
                continue
            else:
                continue
            yield source, file_name, file_md5, bucket_name, 'poi'
Exemplo n.º 17
0
def get_tasks():
    global offset
    global pre_offset
    query_sql = '''SELECT uid
FROM hotel
ORDER BY uid
LIMIT {}, 999999999999999;'''.format(offset)

    for line in MysqlSource(db_config=spider_data_base_data_config,
                            table_or_query=query_sql,
                            size=10000, is_table=False,
                            is_dict_cursor=False):
        pre_offset += 1
        yield line[0]
Exemplo n.º 18
0
def get_task():
    g_dict = generate_qyer_url_id()
    sql = '''SELECT
  id,
  city_id,
  commentcount,
  beentocount,
  plantocount,
  json_extract(url, '$.qyer')
FROM chat_attraction
WHERE id IN
      ('v219493','v219498','v219500','v219745','v219897','v219918','v219932','v220018','v220094','v220160','v220315','v220329','v220403','v220406','v220514','v220516','v220519','v220542','v220543','v220545','v220639','v220657','v220760','v220775','v220776','v220800','v220802','v220805','v220833','v220834','v220836','v220837','v220838','v220943','v220972','v220999','v221104','v221118','v221122','v221123','v221124','v221401','v221407','v221411','v221414','v221415','v221416','v221417','v221419','v221420','v221652','v221848','v221874','v221938','v221939','v222126','v222129','v222141','v222147','v222160','v222168','v222272','v222402','v222490','v222497','v222521','v222538','v222542','v222545','v222554','v222592','v222593','v222784','v222828','v223852','v223875','v223896','v223901','v223946','v223957','v223976','v224021','v224040','v224057','v224072','v224087','v224100','v224105','v224120','v224144','v224145','v224173','v224225','v224227','v224241','v224252','v224273','v224287','v224327','v224332','v224339','v224344','v224456','v224482','v224507','v224516','v224529','v224542','v224555','v224595','v224596','v224598','v224629','v224662','v224679','v224684','v224693','v224694','v224715','v224723','v224724','v224726','v224736','v224738','v224767','v224785','v224825','v224830','v224844','v224860','v224871','v224880','v224904','v224920','v224934','v224945','v224946','v224962','v224971','v224985','v225005','v225026','v225028','v225046','v225059','v225069','v225073','v225077','v225082','v225112','v225178','v225181','v225184','v225202','v225227','v225267','v225290','v225299','v225316','v225348','v225354','v225366','v225390','v225396','v225419','v225428','v225451','v225453','v225504','v225507','v225510','v225524','v225580','v225592','v225595','v225620','v225640','v225644','v225646','v225649','v225673','v225692','v225701','v225709','v225717','v225734','v225739','v225753','v225758','v225831','v225832','v225833','v225860','v225863','v225872','v225908','v225925','v225932','v225951','v225977','v225981','v225988','v225997','v226022','v226028','v226049','v226054','v226105','v226124','v226139','v226219','v226228','v226250','v226311','v226322','v226327','v226371','v226387','v226392','v226435','v226470','v226475','v226493','v226530','v226544','v226553','v226556','v226568','v226572','v226577','v226584','v226637','v226644','v226653','v226692','v226721','v226732','v226734','v226737','v226809','v226813','v226828','v226833','v226883','v226886','v226892','v226907','v226926','v226943','v226974','v226975','v226986','v226998','v227046','v227063','v227071','v227083','v227087','v227100','v227101','v227127','v227134','v227135','v227149','v227158','v227190','v227221','v227278','v227303','v227311','v227312','v227330','v227347','v227349','v227350','v227376','v227380','v227384','v227387','v227414','v227418','v227518','v227543','v227576','v227579','v227633','v227647','v227718','v227765','v227782','v227811','v227816','v227830','v227836','v227849','v227866','v227877','v227893','v227903','v227919','v227925','v227953','v227981','v228002','v228029','v228071','v228086','v228097','v228107','v228123','v228141','v228151','v228182','v228186','v228247','v228257','v228258','v228260','v228287','v228307','v228314','v228319','v228336','v228366','v228368','v228407','v228416','v228429','v228430','v228441','v228450','v228489','v228505','v228506','v228535','v228575','v228582','v228639','v228640','v228658','v228671','v228688','v228706','v228707','v228709','v228733','v228770','v228773','v228793','v228796','v228804','v228817','v228824','v228839','v228885','v228897','v228925','v228927','v228958','v228971','v228997','v229023','v229059','v229068','v229073','v229087','v229115','v229126','v229133','v229145','v229185','v229190','v229194','v229204','v229206','v229226','v229261','v229267','v229275','v229278','v229291','v229292','v229392','v229401','v229404','v229413','v229429','v229432','v229476','v229496','v229531','v229547','v229555','v229620','v229625','v229652','v229656','v229662','v229681','v229703','v229721','v229734','v229766','v229769','v229818','v229820','v229889','v229897','v229905','v229922','v229932','v229954','v229975','v229996','v230020','v230063','v230064','v230111','v230113','v230124','v230150','v230156','v230178','v230182','v230187','v230227','v230231','v230287','v230297','v230298','v230317','v230384','v230397','v230412','v230423','v248961','v249003','v246711','v246736','v246741','v246748','v246769','v246770','v246775','v246783','v246786','v246788','v246789','v246790','v246791','v246793','v246794','v246795','v246797','v246852','v246856','v246861','v246867','v246877','v246887','v246888','v246893','v246894','v246897','v246900','v246906','v246910','v246912','v246913','v246914','v246915','v246916','v246917','v246918','v246919','v246972','v246977','v246996','v247002','v247003','v247010','v247011','v247016','v247019','v247024','v247030','v247036','v247038','v247039','v247089','v247092','v247110','v247119','v247121','v247122','v247127','v247130','v247132','v247137','v247142','v247144','v247153','v247201','v247219','v247223','v247227','v247235','v247242','v247246','v247248','v247255','v247296','v247307','v247319','v247322','v247329','v247333','v247345','v247358','v247362','v247363','v247365','v247368','v247405','v247406','v247409','v247412','v247415','v247419','v247421','v247426','v247432','v247433','v247436','v247437','v247438','v247439','v247440','v247441','v247443','v247444','v247445','v247446','v247447','v247449','v247450','v247452','v247453','v247455','v247457','v247537','v247539','v247542','v247549','v247554','v247558','v247560','v247564','v247567','v247568','v247569','v247570','v247571','v247572','v247573','v247574','v247576','v247577','v247578','v247579','v247581','v247582','v247583','v247584','v247585','v247586','v247648','v247661','v247664','v247665','v247670','v247672','v247697','v247702','v247715','v247719','v247723','v247730','v247741','v247765','v247768','v247779','v247784','v247794','v247798','v247799','v247804','v247820','v247826','v247837','v247863','v247871','v247876','v247883','v247884','v247896','v247902','v247904','v247907','v247910','v247911','v247912','v247913','v247914','v247915','v247916','v247917','v247918','v247919','v247922','v247926','v247932','v247935','v247939','v248019','v248027','v248029','v248032','v248034','v248038','v248042','v248049','v248052','v248055','v248058','v248059','v248060','v248061','v248062','v248063','v248064','v248065','v248066','v248070','v248074','v248077','v248080','v248082','v248169','v248176','v248185','v248188','v248200','v248206','v248208','v248209','v248210','v248215','v248220','v248221','v248280','v248286','v248288','v248289','v248291','v248295','v248302','v248303','v248304','v248306','v248307','v248308','v248309','v248310','v248311','v248312','v248313','v248318','v248319','v248323','v248327','v248332','v248333','v246701','v246729','v246735','v246742','v246756','v246759','v246765','v246767','v246768','v246773','v246774','v246776','v246785','v246792','v246862','v246865','v246881','v246884','v246890','v246895','v246898','v246901','v246904','v246908','v246981','v247001','v247006','v247008','v247009','v247014','v247022','v247023','v247026','v247029','v247034','v247037','v247043','v247083','v247085','v247086','v247098','v247106','v247112','v247116','v247123','v247124','v247133','v247135','v247138','v247140','v247143','v247145','v247146','v247199','v247203','v247220','v247222','v247226','v247230','v247237','v247245','v247297','v247320','v247327','v247330','v247334','v247361','v247404','v247408','v247411','v247414','v247417','v247418','v247420','v247429','v247430','v247492','v247535','v247540','v247545','v247556','v247565','v247647','v247650','v247663','v247668','v247669','v247678','v247681','v247688','v247695','v247700','v247714','v247718','v247724','v247725','v247727','v247769','v247774','v247776','v247785','v247788','v247791','v247800','v247801','v247805','v247817','v247821','v247865','v247875','v247878','v247887','v247892','v247894','v247901','v247906','v247909','v247921','v247925','v247929','v247931','v247934','v247937','v247938','v247941','v248015','v248025','v248030','v248035','v248039','v248047','v248050','v248053','v248056','v248071','v248075','v248078','v248083','v248085','v248090','v248101','v248161','v248163','v248171','v248174','v248179','v248199','v248205','v248212','v248214','v248218','v248219','v248224','v248267','v248296','v248297','v248299','v248315','v248320','v248321','v248324','v248328','v248329','v248334','v223858','v223860','v223872','v223898','v223924','v223950','v223952','v223954','v223992','v224023','v224083','v224217','v224223','v224247','v224253','v224319','v224331','v224522','v224531','v224562','v224650','v224712','v224817','v224833','v224881','v224947','v224970','v224973','v224976','v224979','v225016','v225024','v225044','v225139','v225165','v225187','v225189','v225361','v225449','v225486','v225513','v225659','v225715','v225747','v225784','v225837','v225904','v225913','v226014','v226024','v226087','v226144','v226145','v226169','v226268','v226308','v226310','v226312','v226332','v226342','v226376','v226408','v226409','v226414','v226575','v226643','v226695','v226719','v226728','v226853','v226901','v227128','v227184','v227226','v227254','v227356','v227398','v227400','v227525','v227593','v227625','v227671','v227685','v227761','v227801','v227824','v227827','v227861','v227891','v227904','v227945','v228103','v228104','v228128','v228139','v228172','v228179','v228282','v228291','v228329','v228661','v228673','v228679','v228692','v228704','v228716','v228761','v228800','v228805','v228972','v229025','v229081','v229109','v229151','v229244','v229256','v229319','v229348','v229372','v229433','v229438','v229449','v229454','v229477','v229552','v229594','v229636','v229697','v229715','v229761','v229924','v229964','v229981','v230012','v230026','v230084','v230185','v230278','v230318','v230338','v230348','v230409','v231507','v231661','v231833','v232874','v233082','v233224','v233709','v233808','v234353','v235111','v235325','v236475','v237091','v237353','v237862','v237890','v238094','v238237','v239713','v239846','v241035','v241985','v242366','v242409','v242717','v242804','v243215','v243475','v243548','v243772','v246698','v246723','v246740','v246743','v246751','v246760','v246763','v246777','v246853','v246857','v246866','v246880','v246907','v246971','v246976','v246997','v247015','v247082','v247084','v247093','v247095','v247111','v247131','v247134','v247200','v247202','v247214','v247238','v247241','v247243','v247298','v247301','v247323','v247326','v247422','v247431','v247448','v247538','v247550','v247555','v247671','v247674','v247682','v247689','v247696','v247701','v247766','v247773','v247775','v247780','v247787','v247796','v247862','v247870','v247879','v247882','v247893','v247895','v247940','v248016','v248028','v248043','v248088','v248155','v248167','v248172','v248175','v248277','v248281','v248287','v246728','v246873','v246987','v247102','v247154','v247218','v247306','v247692','v248157','v248165','v248196','v245354','v245370','v245387','v245437','v245448','v245498','v245541','v245550','v245560','v245572','v245591','v245595','v245598','v245643','v245645','v245646','v245649','v245659','v245664','v245667','v245676','v245678','v245679','v245682','v245684','v245685','v245694','v245696','v245697','v245700','v245701','v245712','v245718','v245728','v245730','v245731','v245733','v245771','v245774','v245777','v245793','v245808','v245835','v245836','v245854','v245855','v245859','v245884','v245887','v245889','v245905','v245906','v245908','v245912','v245914','v245920','v245921','v245922','v245924','v245941','v245944','v245946','v245947','v245969','v245990','v246035','v246059','v246066','v246070','v246071','v246072','v246073','v246074','v246075','v246085','v246087','v246088','v246089','v246090','v246104','v246105','v246108','v246109','v246110','v246112','v246122','v246125','v246128','v246130','v246134','v246138','v246142','v246143','v246149','v246167','v246184','v246188','v246211','v246212','v246213','v246214','v246215','v246216','v246219','v246222','v246264','v246280','v246281','v246304','v246312','v246314','v246322','v246327','v246334','v246337','v246404','v246408','v246412','v246413','v246455','v246456','v246457','v246458','v246459','v246460','v246463','v246482','v246503','v246505','v246506','v246509','v246511','v246513','v246516','v246526','v246546','v246547','v246570','v246577','v246579','v246582','v246619','v246620','v246622','v246624','v246626','v246628','v246631','v246633','v246635','v246637','v246646','v246650','v246651','v246653','v246655','v246656','v246657','v246673','v246675','v246718','v246727','v246758','v246761','v246778','v246780','v246855','v246870','v246879','v246882','v246956','v246988','v246991','v246999','v247099','v247105','v247150','v247205','v247213','v247234','v247292','v247349','v247491','v247543','v247864','v247890','v247898','v247947','v248017','v248020','v248041','v248046','v248158','v248183','v248195','v248203','v248279','v248284','v245364','v245373','v245449','v245450','v245453','v245455','v245456','v245476','v245477','v245482','v245483','v245484','v245485','v245486','v245488','v245489','v245491','v245492','v245494','v245495','v245496','v245497','v245515','v245516','v245517','v245519','v245520','v245521','v245522','v245527','v245528','v245539','v245542','v245544','v245546','v245552','v245554','v245569','v245571','v245592','v245633','v245634','v245635','v245636','v245637','v245638','v245639','v245640','v245641','v245642','v245681','v245686','v245687','v245688','v245689','v245690','v245691','v245692','v245693','v245703','v245704','v245705','v245706','v245707','v245708','v245709','v245710','v245711','v245713','v245714','v245717','v245727','v245749','v245810','v245812','v245813','v245814','v245815','v245818','v245837','v245838','v245843','v245845','v245846','v245847','v245848','v245849','v245850','v245851','v245852','v245891','v245892','v245893','v245894','v245895','v245897','v245898','v245903','v245904','v245928','v245959','v245963','v245964','v245965','v245966','v245967','v245968','v245971','v246019','v246020','v246021','v246023','v246024','v246025','v246026','v246027','v246031','v246033','v246037','v246064','v246068','v246069','v246076','v246077','v246078','v246079','v246080','v246082','v246083','v246084','v246106','v246107','v246111','v246114','v246118','v246124','v246126','v246127','v246129','v246132','v246133','v246136','v246158','v246161','v246162','v246186','v246190','v246192','v246199','v246208','v246217','v246221','v246223','v246259','v246263','v246292','v246293','v246295','v246296','v246297','v246298','v246299','v246300','v246309','v246315','v246319','v246320','v246324','v246331','v246333','v246339','v246402','v246411','v246415','v246416','v246417','v246418','v246422','v246444','v246447','v246448','v246449','v246450','v246451','v246452','v246453','v246454','v246461','v246462','v246483','v246486','v246510','v246512','v246514','v246517','v246520','v246521','v246523','v246525','v246527','v246528','v246529','v246531','v246542','v246544','v246550','v246551','v246553','v246571','v246573','v246581','v246630','v246659','v246661','v246663','v246665','v246667','v245353','v245357','v245358','v245362','v245363','v245366','v245368','v245371','v245372','v245376','v245388','v245389','v245401','v245430','v245432','v245433','v245435','v245436','v245474','v245480','v245499','v245500','v245501','v245502','v245503','v245514','v245523','v245525','v245526','v245530','v245532','v245534','v245535','v245538','v245553','v245555','v245557','v245570','v245590','v245594','v245596','v245597','v245600','v245644','v245648','v245650','v245665','v245666','v245668','v245669','v245670','v245671','v245672','v245675','v245677','v245699','v245715','v245716','v245719','v245721','v245722','v245723','v245724','v245725','v245726','v245729','v245732','v245738','v245776','v245778','v245780','v245794','v245795','v245796','v245819','v245820','v245821','v245822','v245823','v245824','v245839','v245840','v245841','v245842','v245844','v245853','v245856','v245857','v245862','v245863','v245880','v245881','v245883','v245885','v245888','v245899','v245900','v245901','v245907','v245909','v245917','v245918','v245919','v245923','v245942','v245948','v245970','v245972','v245989','v246029','v246036','v246038','v246051','v246052','v246053','v246054','v246055','v246056','v246057','v246058','v246060','v246061','v246063','v246065','v246067','v246086','v246100','v246103','v246113','v246115','v246117','v246120','v246121','v246131','v246135','v246137','v246139','v246144','v246159','v246164','v246165','v246185','v246189','v246197','v246198','v246209','v246220','v246261','v246262','v246266','v246267','v246285','v246286','v246290','v246301','v246302','v246305','v246306','v246308','v246313','v246316','v246317','v246325','v246326','v246328','v246329','v246330','v246332','v246335','v246336','v246338','v246352','v246353','v246355','v246406','v246407','v246409','v246410','v246439','v246440','v246441','v246442','v246464','v246466','v246467','v246468','v246469','v246484','v246487','v246488','v246489','v246490','v246491','v246492','v246493','v246495','v246498','v246499','v246500','v246501','v246502','v246504','v246508','v246522','v246524','v246530','v246543','v246548','v246585','v246623','v246629','v246647','v246669','v246676','v246679','v246697','v246705','v246730','v246737','v246738','v246744','v246757','v246764','v246787','v246830','v246859','v246860','v246868','v246875','v246876','v246883','v246958','v246973','v246978','v246983','v247012','v247017','v247088','v247091','v247103','v247109','v247126','v247129','v247151','v247206','v247207','v247216','v247224','v247228','v247236','v247258','v247295','v247311','v247318','v247325','v247328','v247332','v247336','v247364','v247366','v247424','v247427','v247442','v247548','v247553','v247557','v247559','v247563','v247675','v247680','v247684','v247693','v247698','v247716','v247720','v247767','v247772','v247782','v247783','v247790','v247793','v247819','v247823','v247825','v247859','v247860','v247868','v247877','v247880','v247885','v247889','v247897','v247923','v247927','v247942','v247944','v248037','v248067','v248069','v248073','v248162','v248182','v248202','v248207','v248266','v248275','v248285','v248290','v513556','v246691','v246712','v246734','v246739','v246745','v246762','v246766','v246772','v246779','v246781','v246784','v246829','v246849','v246858','v246863','v246878','v246885','v246891','v246892','v246899','v246905','v246955','v246974','v246979','v246990','v246995','v247004','v247005','v247007','v247013','v247018','v247020','v247021','v247025','v247028','v247032','v247033','v247040','v247087','v247090','v247096','v247113','v247115','v247117','v247118','v247120','v247125','v247128','v247136','v247139','v247141','v247149','v247152','v247204','v247225','v247229','v247247','v247249','v247308','v247321','v247324','v247331','v247335','v247348','v247407','v247413','v247416','v247425','v247428','v247434','v247493','v247536','v247547','v247552','v247562','v247566','v247649','v247662','v247666','v247667','v247676','v247677','v247694','v247713','v247717','v247721','v247726','v247728','v247729','v247742','v247763','v247770','v247771','v247781','v247786','v247789','v247792','v247806','v247818','v247822','v247861','v247881','v247886','v247899','v247903','v247905','v247908','v247920','v247928','v247930','v247933','v247936','v247946','v248018','v248031','v248036','v248048','v248051','v248054','v248072','v248076','v248081','v248084','v248086','v248166','v248173','v248177','v248178','v248181','v248189','v248211','v248213','v248216','v248217','v248222','v248223','v248276','v248278','v248282','v248292','v248300','v248316','v248322','v248325','v248330','v248331','v245558','v245559','v245561','v245562','v245563','v245564','v245565','v245566','v245567','v245568','v245926','v245927','v245931','v245932','v245933','v245934','v245935','v245936','v245937','v246172','v246173','v246178','v246287','v246378','v246379','v246382','v246383','v246384','v246385','v245355','v245360','v245369','v245386','v245404','v245452','v245457','v245458','v245459','v245461','v245462','v245463','v245464','v245465','v245469','v245475','v245479','v245481','v245487','v245493','v245504','v245505','v245508','v245509','v245510','v245511','v245512','v245513','v245529','v245533','v245537','v245540','v245545','v245547','v245549','v245593','v245627','v245628','v245629','v245630','v245631','v245632','v245647','v245660','v245661','v245674','v245683','v245702','v245739','v245770','v245772','v245781','v245782','v245825','v245827','v245828','v245829','v245831','v245833','v245834','v245858','v245860','v245872','v245873','v245874','v245875','v245878','v245882','v245890','v245896','v245902','v245910','v245911','v245925','v245930','v245950','v246032','v246047','v246048','v246081','v246091','v246092','v246093','v246094','v246095','v246096','v246097','v246098','v246099','v246101','v246102','v246141','v246163','v246187','v246193','v246194','v246210','v246248','v246249','v246250','v246251','v246252','v246351','v246354','v246372','v246377','v246403','v246405','v246419','v246420','v246423','v246424','v246425','v246426','v246427','v246428','v246430','v246431','v246432','v246433','v246434','v246435','v246436','v246437','v246438','v246443','v246445','v246446','v246471','v246472','v246473','v246474','v246475','v246477','v246480','v246481','v246611','v246612','v246613','v246614','v246617','v246618','v246632','v246634','v246638','v246639','v246640','v246641','v246642','v246643','v246644','v246645','v246648','v246649','v246652','v246658','v246664','v246726','v246753','v246954','v246985','v247147','v247215','v247401','v248154','v248159','v248198','v248270','v246747','v246851','v246982','v247000','v247305','v247402','v248089','v248192','v248271','v245466','v245467','v245470','v245471','v245472','v245473','v245531','v245536','v245548','v245551','v245574','v245575','v245576','v245577','v245951','v245952','v245953','v245954','v245955','v245956','v245957','v245958','v245961','v245962','v246724','v246828','v246848','v246871','v246984','v246993','v247104','v247108','v247209','v247291','v247293','v247310','v247400','v247490','v247544','v247683','v247691','v247869','v248197','v248272','v248293','v246731','v246854','v246872','v246980','v247100','v247399','v247686','v248023','v248153','v248191','v248269','v569633','v569642','v569667','v569671','v569696','v569714','v569722','v569723','v569740','v569742','v569748','v569783','v569793','v569794','v569804','v569805','v569816','v569836','v701409','v701410','v701411','v701413','v701414','v701417','v701418','v701419','v701420','v701421','v701422','v701423','v701424','v701425','v701426','v701427','v701428','v701429','v701430','v701431','v701432','v701433','v701434','v701435','v701436','v701437','v701438','v701439','v701440','v701441','v701442','v701443','v701444','v701445','v701446','v701447','v701448','v701449','v701450','v701451','v701452','v701453','v701454','v701455','v701456','v701457','v701458','v701459','v701460','v701461','v701462','v701463','v701464','v701465','v701466','v701467','v701468','v701469','v701470','v701471','v701472','v701473','v701474','v701475','v701476','v701477','v701478','v701479','v701480','v701481','v701482','v701483','v701484','v701485','v701486','v701487','v701488','v701489','v701490','v701491','v701492','v701493','v701494','v701495','v701496','v701497','v701498','v701499','v701500','v701501','v701502','v701503','v701504','v701505','v701506','v701507','v701508','v701509','v701510','v701511','v701512','v701513','v701514','v701515','v701516','v701517','v701518','v701519','v701520','v701521','v701522','v701523','v701524','v701525','v639923','v639924','v639925','v639926','v639927','v639928','v639929','v639930','v639931','v639932','v639933','v639934','v639935','v639936','v639937','v639938','v639939','v639940','v639941','v639942','v639943','v515408','v515409','v515410','v515412','v515413','v515414','v515415','v515416','v515417','v515418','v515419','v515420','v515421','v515422','v515423','v515424','v515425','v515426','v701542','v701543','v701548','v701557','v701558','v701559','v701560','v701561','v701562','v701563','v701564','v701565','v701566','v701567','v701568','v701569','v701570','v701571','v701572','v701573','v701574','v701575','v701576','v701577','v701578','v701579','v701580','v701581','v701582','v701583','v701584','v701585','v701586','v701587','v701588','v701589','v701590','v701591','v701592','v701593','v701594','v701595','v701596','v701597','v701598','v701599','v701600','v701601','v701602','v701603','v701604','v701605','v701606','v701607','v701608','v701609','v701610','v701611','v701612','v701613','v701614','v701615','v701616','v701617','v701618','v701619','v701620','v701621','v701622','v701623','v701624','v701625','v701626','v701627','v701628','v701629','v701630','v701661','v701664','v515427','v515428','v515429','v515430','v515432','v515433','v515434','v515435','v639944','v639945','v639946','v701845','v701846','v701847','v701848','v701849','v701850','v701851','v701852','v701853','v701854','v701855','v701856','v701859','v701860','v639947','v639948','v639949','v639950','v639951','v639952','v639953','v639954','v639955','v639956','v639957','v639958','v639959','v639960','v639961','v639962','v639963');'''
    data = []
    _count = 0
    for uid, city_id, b_c, p_c, c_c, q_url in MysqlSource(
            poi_ori_config,
            table_or_query=sql,
            size=10000,
            is_table=False,
            is_dict_cursor=False):
        _count += 1
        if not str(q_url).endswith('/'):
            q_url += '/'
        q_url_id = re.findall('http://place.qyer.com/poi/(\S+?)/', q_url)[-1]

        bc_d = json.loads(b_c)
        # if 'qyer' in bc_d:
        #     del bc_d['qyer']
        pc_d = json.loads(p_c)
        # if 'qyer' in pc_d:
        #     del pc_d['qyer']
        cc_d = json.loads(c_c)
        # del cc_d['qyer']

        res = g_dict.get(q_url_id)
        if res:
            bc, cc, pc, s_id = res
            bc_d['qyer'] = int(bc)
            cc_d['qyer'] = int(cc)
            pc_d['qyer'] = int(pc)
            print(uid, json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d),
                  q_url_id, q_url, s_id)
        else:
            print('##' * 10)
            print(uid, json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d),
                  q_url_id, q_url, s_id)
            print('##' * 10)

        logger.info("[count: {}]".format(_count))
        update_sql((json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d), uid))
Exemplo n.º 19
0
def get_task():
    sql = '''SELECT uid FROM hotel_unid WHERE source='accor';'''
    u_l = []
    _count = 0
    for line in MysqlSource(base_data_config,
                            table_or_query=sql,
                            size=10000,
                            is_table=False):
        _count += 1
        u_l.append(line[0])
        if len(u_l) % 5000 == 0:
            reset_task(u_l)
            logger.info("[total: {}]".format(_count))
    if u_l:
        reset_task(u_l)
Exemplo n.º 20
0
def get_old_info_dict():
    sql = '''SELECT
  id,
  source,
  name,
  name_en,
  map_info,
  address,
  plantocounts,
  beentocounts,
  ranking,
  grade,
  commentcounts,
  imgurl,
  introduction,
  opentime
FROM poi_merge.attr 
WHERE source='qyer';'''
    __dict = defaultdict(dict)
    _count = 0
    for line in MysqlSource(poi_ori_config,
                            table_or_query=sql,
                            size=5000,
                            is_table=False,
                            is_dict_cursor=True):
        _count += 1
        if _count % 3000 == 0:
            logger.info("[load old data info][count: {}]".format(_count))
        sid = line['id']

        for key_name, is_strict, num_check in check_name:
            if is_strict:
                __dict[sid][key_name] = line[key_name]
            else:
                legal_res = is_legal(line[key_name])
                if not num_check:
                    check_res = legal_res
                else:
                    try:
                        if int(legal_res) in (-1, 0):
                            check_res = False
                        else:
                            check_res = True
                    except Exception:
                        check_res = False
                __dict[sid][key_name] = check_res
    logger.info("[load old data info finished][count: {}]".format(_count))
    return __dict
Exemplo n.º 21
0
def detect():
    conn = create_engine(spider_data_tmp_str)
    table = pandas.read_sql(sql='''SELECT file_name,
  sid,
  url,
  pic_size,
  bucket_name,
  url_md5,
  pic_md5,
  `use`,
  source,
  status,
  date FROM shop_bucket_relation LIMIT 0;''',
                            con=conn)
    table['width'] = ''
    table['height'] = ''
    sql = '''SELECT file_name,
  sid,
  url,
  pic_size,
  bucket_name,
  url_md5,
  pic_md5,
  `use`,
  source,
  status,
  date
FROM shop_bucket_relation
WHERE source IN ('daodao', 'machine', 'NULL') AND `use` = 1 AND pic_size!='NULL';'''

    _count = 0
    for line in MysqlSource(db_config=spider_data_tmp_config,
                            table_or_query=sql,
                            size=1024,
                            is_table=False,
                            is_dict_cursor=True):
        _count += 1
        if _count % 1024 == 0:
            print("now: {}".format(_count))
        width, height = eval(line['pic_size'])
        width = int(width)
        height = int(height)
        line['width'] = width
        line['height'] = height
        if width == height:
            new_row = pandas.DataFrame([line])
            table = table.append(new_row)
    return table
Exemplo n.º 22
0
def report():
    query_sql = '''SELECT
  id,
  url
FROM chat_attraction;'''

    union_dict = defaultdict(set)
    _count = 0
    for line in MysqlSource(poi_ori_config,
                            table_or_query=query_sql,
                            size=10000,
                            is_table=False,
                            is_dict_cursor=True):
        _count += 1

        if _count % 10000 == 0:
            logger.debug("[now count: {}]".format(_count))

        _id = line['id']
        _url = line['url']
        if not _url:
            continue
        urls = json.loads(_url)

        if 'qyer' in urls:
            try:
                _source = 'qyer'
                _sid = re.findall('place.qyer.com/poi/([\s\S]+)/',
                                  urls['qyer'])[0]
                union_dict[(_source, _sid)].add(_id)
            except Exception:
                pass

        if 'daodao' in urls:
            try:
                _source = 'daodao'
                _sid = re.findall('-d(\d+)', urls['daodao'])[0]
                union_dict[(_source, _sid)].add(_id)
            except Exception:
                pass
    _count = 0
    for k, v in union_dict.items():
        if len(v) > 1:
            _count += 1
            logger.info(
                "[ source, sid : {} ][ can be merged uid : {} ]".format(k, v))
    logger.info("[total: {}]".format(_count))
Exemplo n.º 23
0
    def update_per_hotel_validation(self, env='test'):
        data = []

        if env == 'test':
            db_conf = test_db
        elif env == 'online':
            db_conf = online_db
        else:
            raise TypeError("Unknown Env: {}".format(env))

        sql = '''SELECT
          source,
          sid,
          uid,
          mid,
          name,
          name_en,
          hotel_url
        FROM hotel_unid LIMIT {},999999999999;'''.format(self.offset)
        for line in MysqlSource(db_conf,
                                table_or_query=sql,
                                size=10000,
                                is_table=False,
                                is_dict_cursor=True):
            source = line['source']
            try:
                ret_data = self.get_content(source=source, line=line)
                if ret_data:
                    data.append(ret_data)
            except ReportException as r_exc:
                logger.warning("[report error][msg: {}]".format(str(r_exc)))
                self.report_dict[(str(r_exc), r_exc.type)] += 1
            except Exception as exc:
                logger.exception(
                    msg="[make workload key has exception][source: {}]".format(
                        source),
                    exc_info=exc)
                raise exc

            self.pre_offset += 1
            if len(data) == 2000:
                # replace into validation data
                self.insert_data(data)
                data = []

        # replace into validation data
        self.insert_data(data)
Exemplo n.º 24
0
def get_task():
    sql = '''SELECT file_name
FROM error_f_md5_file;'''
    data = []
    _count = 0
    for line in MysqlSource(poi_ori_config,
                            table_or_query=sql,
                            size=10000,
                            is_table=False,
                            is_dict_cursor=False):
        _count += 1
        data.append(line[0])
        if len(data) == 1000:
            logger.info("[count: {}]".format(_count))
            update_sql(data)
            data = []
    update_sql(data)
Exemplo n.º 25
0
def task():
    query_sql = '''SELECT
  sid,
  file_name,
  bucket_name
FROM poi_images
WHERE source = 'online' AND bucket_name LIKE '%attr%' AND sid LIKE 'v%' AND `use` = 1
ORDER BY sid;'''

    old_poi_id = None
    img_name_set = set()
    _count = 0

    result_f = open('/tmp/img_res_new', mode='w')

    for line in MysqlSource(poi_ori_config,
                            table_or_query=query_sql,
                            size=10000,
                            is_table=False,
                            is_dict_cursor=True):
        _count += 1
        if _count % 3000 == 0:
            logger.debug("[now count: {}]".format(_count))

        if 'attr' not in line['bucket_name'] and not line['sid'].startswith(
                'v'):
            continue

        # 先获取 poi id
        poi_id = line['sid']

        # id 变更后,查找图片,重新生成
        if poi_id != old_poi_id:
            if old_poi_id is not None:
                has_detected_pic_file = get_poi_pic_detect(old_poi_id)
                lost_img = (img_name_set - has_detected_pic_file)
                for i in lost_img:
                    logger.debug(
                        "[img not detected][poi_id: {}][img: {}]".format(
                            old_poi_id, i))
                    result_f.write('{}###{}\n'.format(old_poi_id, i))
            old_poi_id = poi_id
            img_name_set = set()

        file_name = line['file_name']
        img_name_set.add(file_name)
Exemplo n.º 26
0
def get_task():
    sql = '''SELECT id
FROM pic_detect_task
WHERE poi_id IN
      ('qyer###558781','qyer###52728','qyer###54661','qyer###558134','qyer###558215','qyer###558233','qyer###558267','qyer###558285','qyer###558305','qyer###558448','qyer###558491','qyer###558547','qyer###558708','qyer###558745','qyer###558820','qyer###558879','qyer###56415','qyer###52640','qyer###52743','qyer###54680','qyer###558186','qyer###558217','qyer###558241','qyer###558269','qyer###558287','qyer###558309','qyer###558450','qyer###54688','qyer###558193','qyer###558218','qyer###558248','qyer###558272','qyer###558289','qyer###558318','qyer###558452','qyer###558502','qyer###558571','qyer###558717','qyer###558765','qyer###558844','qyer###558915','qyer###57716','qyer###558500','qyer###558565','qyer###558712','qyer###558759','qyer###558841','qyer###558907','qyer###56478','qyer###52736','qyer###54665','qyer###558182','qyer###558216','qyer###558240','qyer###558268','qyer###558286','qyer###558307','qyer###558449','qyer###558493','qyer###54804','qyer###558197','qyer###558223','qyer###558251','qyer###558276','qyer###558291','qyer###558420','qyer###558472','qyer###558506','qyer###558576','qyer###558721','qyer###558770','qyer###558862','qyer###558933','qyer###57721','qyer###54711','qyer###558194','qyer###558222','qyer###558249','qyer###558275','qyer###558290','qyer###558320','qyer###558462','qyer###558504','qyer###558559','qyer###558709','qyer###558750','qyer###558821','qyer###558881','qyer###56421','qyer###558572','qyer###558719','qyer###558768','qyer###558858','qyer###558929','qyer###57719','qyer###558875','qyer###485076','qyer###485000','qyer###118786','qyer###123633','qyer###1329647','qyer###122769','qyer###1414076','qyer###447276','qyer###415327','qyer###86734','qyer###86767','qyer###1207837','qyer###109110','qyer###1207832','qyer###485285','qyer###203099','qyer###105666','qyer###537021','qyer###1451741','qyer###35050','qyer###72483','qyer###82690','qyer###86904','qyer###98539','qyer###69988','qyer###72489','qyer###83266','qyer###87010','qyer###99646','qyer###202648','qyer###69966','qyer###72471','qyer###72491','qyer###84214','qyer###90126','qyer###84188','qyer###88709','qyer###99647','qyer###100230','qyer###106478','qyer###109768','qyer###109849','qyer###109860','qyer###1140829','qyer###116647','qyer###117620','qyer###1209574','qyer###122317','qyer###122455','qyer###123255','qyer###123767','qyer###125028','qyer###1447115','qyer###164229','qyer###102176','qyer###106827','qyer###109840','qyer###109852','qyer###113225','qyer###1163474','qyer###116651','qyer###117851','qyer###1209668','qyer###122337','qyer###122676','qyer###123673','qyer###124124','qyer###1321717','qyer###1454527','qyer###164679','qyer###102167','qyer###106724','qyer###109835','qyer###109850','qyer###111101','qyer###1140830','qyer###116648','qyer###117733','qyer###1209648','qyer###122332','qyer###122456','qyer###123346','qyer###124095','qyer###125610','qyer###1451260','qyer###164231','qyer###102169','qyer###106826','qyer###109838','qyer###109851','qyer###111516','qyer###116030','qyer###116650','qyer###117840','qyer###1209649','qyer###122336','qyer###122675','qyer###123348','qyer###124113','qyer###1321712','qyer###1451997','qyer###164652','qyer###181079','qyer###184915','qyer###202978','qyer###34984','qyer###38773','qyer###39787','qyer###41399','qyer###42506','qyer###42796','qyer###43168','qyer###45192','qyer###46181','qyer###48982','qyer###50242','qyer###51537','qyer###52503','qyer###181078','qyer###184896','qyer###202976','qyer###34964','qyer###38074','qyer###39676','qyer###41387','qyer###42207','qyer###42790','qyer###43147','qyer###45183','qyer###46129','qyer###48966','qyer###50064','qyer###51503','qyer###51876','qyer###181084','qyer###196701','qyer###204467','qyer###35069','qyer###38884','qyer###40533','qyer###41637','qyer###42519','qyer###42804','qyer###43426','qyer###45313','qyer###46272','qyer###49064','qyer###51436','qyer###51613','qyer###52523','qyer###181081','qyer###185944','qyer###203542','qyer###35061','qyer###38843','qyer###40518','qyer###41582','qyer###42509','qyer###42799','qyer###43308','qyer###45307','qyer###46231','qyer###49026','qyer###51186','qyer###51602','qyer###52518','qyer###53417','qyer###538515','qyer###54045','qyer###55213','qyer###55998','qyer###56407','qyer###59328','qyer###61793','qyer###61804','qyer###88358','qyer###94929','qyer###94951','qyer###95014','qyer###53479','qyer###538557','qyer###54172','qyer###55218','qyer###56003','qyer###580621','qyer###59332','qyer###61795','qyer###61806','qyer###88714','qyer###94936','qyer###94952','qyer###95015','qyer###53562','qyer###538719','qyer###54333','qyer###55233','qyer###56029','qyer###581083','qyer###59346','qyer###61798','qyer###61812','qyer###94904','qyer###94940','qyer###94956','qyer###95018','qyer###53538','qyer###538599','qyer###54232','qyer###55230','qyer###56008','qyer###580622','qyer###59339','qyer###61796','qyer###61811','qyer###94855','qyer###94938','qyer###94955','qyer###95017','qyer###123136','qyer###204725','qyer###94916','qyer###123302','qyer###566546','qyer###566263','qyer###568699','qyer###566315','qyer###1448695','qyer###201860','qyer###59125','qyer###73868','qyer###118188','qyer###86694','qyer###57004','qyer###201651','qyer###279997','qyer###428505','qyer###429488','qyer###429764','qyer###430995','qyer###431804','qyer###454679','qyer###459803','qyer###48114','qyer###48754','qyer###501134','qyer###502112','qyer###502038','qyer###545884','qyer###59502','qyer###82246','qyer###82914','qyer###84104','qyer###84342','qyer###84555','qyer###95023','qyer###98561','qyer###99217','qyer###51086','qyer###54023','qyer###545222','qyer###546012','qyer###61971','qyer###82247','qyer###82915','qyer###84105','qyer###84491','qyer###84944','qyer###95029','qyer###98569','qyer###99749','qyer###48453','qyer###501010','qyer###501566','qyer###503610','qyer###53426','qyer###544687','qyer###545800','qyer###581581','qyer###1146305','qyer###429684','qyer###430951','qyer###431567','qyer###454146','qyer###459362','qyer###47942','qyer###48666','qyer###501019','qyer###501568','qyer###50385','qyer###82203','qyer###82908','qyer###84102','qyer###84340','qyer###84539','qyer###84956','qyer###98554','qyer###99207','qyer###53513','qyer###544696','qyer###545861','qyer###59501','qyer###82204','qyer###82909','qyer###84103','qyer###84341','qyer###84553','qyer###86771','qyer###98558','qyer###99208','qyer###430488','qyer###545436','qyer###455435','qyer###546034','qyer###107362','qyer###108831','qyer###111494','qyer###1144714','qyer###1204856','qyer###1205859','qyer###1206848','qyer###1208031','qyer###122414','qyer###123915','qyer###1448381','qyer###1448740','qyer###1449249','qyer###1452521','qyer###105743','qyer###107346','qyer###108830','qyer###109732','qyer###1144125','qyer###119723','qyer###1205857','qyer###1206845','qyer###1208030','qyer###121124','qyer###123496','qyer###1448001','qyer###1448714','qyer###1449231','qyer###1452431','qyer###182976','qyer###204006','qyer###206337','qyer###207792','qyer###39584','qyer###41566','qyer###452383','qyer###452462','qyer###45601','qyer###48602','qyer###513149','qyer###55168','qyer###62009','qyer###66090','qyer###66100','qyer###200638','qyer###204108','qyer###206341','qyer###207793','qyer###40316','qyer###41604','qyer###452393','qyer###452472','qyer###45685','qyer###48612','qyer###513155','qyer###55625','qyer###62143','qyer###66091','qyer###66101','qyer###81521','qyer###81520','qyer###81528','qyer###84928','qyer###89158','qyer###89171','qyer###89373','qyer###94790','qyer###81529','qyer###84929','qyer###89164','qyer###89172','qyer###89374','qyer###94803','qyer###107332','qyer###108821','qyer###108909','qyer###1144121','qyer###119661','qyer###1205785','qyer###120660','qyer###1208029','qyer###1209153','qyer###123487','qyer###1447998','qyer###1448704','qyer###1449207','qyer###1452383','qyer###106682','qyer###108813','qyer###108908','qyer###1139484','qyer###119658','qyer###1205708','qyer###120658','qyer###1208028','qyer###1209127','qyer###123479','qyer###1446211','qyer###1448532','qyer###1449206','qyer###1451992','qyer###164736','qyer###203521','qyer###206336','qyer###207791','qyer###35544','qyer###41485','qyer###452375','qyer###452457','qyer###452493','qyer###48549','qyer###513133','qyer###53688','qyer###61826','qyer###66085','qyer###66099','qyer###81519','qyer###164734','qyer###202749','qyer###206095','qyer###207790','qyer###34616','qyer###41481','qyer###452370','qyer###452450','qyer###452490','qyer###47367','qyer###513130','qyer###513199','qyer###61825','qyer###66084','qyer###66097','qyer###66167','qyer###81527','qyer###84926','qyer###89157','qyer###89170','qyer###89372','qyer###89786','qyer###81526','qyer###84925','qyer###89155','qyer###89169','qyer###89250','qyer###89621','qyer###99414','qyer###103453','qyer###107466','qyer###34482','qyer###1448825','qyer###581276','qyer###84933','qyer###104804','qyer###108004','qyer###1144553','qyer###1149827','qyer###116590','qyer###117174','qyer###117183','qyer###117187','qyer###117191','qyer###117210','qyer###1201726','qyer###1201865','qyer###1207499','qyer###1211088','qyer###1211486','qyer###1211888','qyer###106229','qyer###1101051','qyer###1144554','qyer###1161161','qyer###117170','qyer###117178','qyer###117184','qyer###117188','qyer###117193','qyer###117212','qyer###1201731','qyer###1201871','qyer###1208371','qyer###1211414','qyer###1211636','qyer###1211993','qyer###106231','qyer###112106','qyer###1145607','qyer###1161162','qyer###117171','qyer###117181','qyer###117185','qyer###117189','qyer###117195','qyer###119213','qyer###1201796','qyer###1201874','qyer###1209783','qyer###1211427','qyer###1211662','qyer###1212093','qyer###107358','qyer###1144549','qyer###1145794','qyer###116580','qyer###117173','qyer###117182','qyer###117186','qyer###117190','qyer###117207','qyer###1201604','qyer###1201852','qyer###1205949','qyer###1210091','qyer###1211473','qyer###1211803','qyer###1212095','qyer###1219810','qyer###123549','qyer###124298','qyer###1253756','qyer###1266027','qyer###1452553','qyer###1453565','qyer###14605','qyer###14625','qyer###14631','qyer###164303','qyer###164437','qyer###189225','qyer###201127','qyer###202276','qyer###202901','qyer###121896','qyer###1234685','qyer###1236200','qyer###1252732','qyer###1266011','qyer###1451214','qyer###1452742','qyer###14595','qyer###14615','qyer###14629','qyer###163988','qyer###164432','qyer###189196','qyer###201125','qyer###202272','qyer###202279','qyer###1234660','qyer###1236156','qyer###1251885','qyer###1265056','qyer###1450706','qyer###1452582','qyer###1454918','qyer###14614','qyer###14627','qyer###163986','qyer###164391','qyer###175388','qyer###201124','qyer###202260','qyer###202278','qyer###204859','qyer###122277','qyer###1235915','qyer###124654','qyer###126377','qyer###1446115','qyer###1452560','qyer###1454917','qyer###14608','qyer###14626','qyer###163984','qyer###164390','qyer###165080','qyer###200660','qyer###202254','qyer###202277','qyer###203338','qyer###204861','qyer###33734','qyer###34156','qyer###34535','qyer###36456','qyer###37785','qyer###39251','qyer###43218','qyer###43392','qyer###481770','qyer###481809','qyer###482061','qyer###482319','qyer###482340','qyer###482382','qyer###482846','qyer###204860','qyer###33732','qyer###33751','qyer###34454','qyer###36274','qyer###37349','qyer###39040','qyer###43041','qyer###43390','qyer###45622','qyer###481793','qyer###482040','qyer###482176','qyer###482336','qyer###482380','qyer###482563','qyer###205162','qyer###33750','qyer###34332','qyer###35432','qyer###37236','qyer###38229','qyer###39885','qyer###43257','qyer###45506','qyer###481779','qyer###482019','qyer###482104','qyer###482331','qyer###482376','qyer###482510','qyer###483289','qyer###205161','qyer###33744','qyer###34165','qyer###35231','qyer###36524','qyer###38200','qyer###39356','qyer###43253','qyer###44190','qyer###481776','qyer###481883','qyer###482074','qyer###482324','qyer###482366','qyer###482479','qyer###483256','qyer###483520','qyer###497111','qyer###497405','qyer###497466','qyer###497753','qyer###497855','qyer###498042','qyer###51761','qyer###52428','qyer###52995','qyer###53728','qyer###54096','qyer###54128','qyer###54161','qyer###54636','qyer###54961','qyer###496599','qyer###497117','qyer###497413','qyer###497557','qyer###497805','qyer###497870','qyer###498270','qyer###51805','qyer###52460','qyer###53009','qyer###54069','qyer###54101','qyer###54133','qyer###54169','qyer###54892','qyer###54976','qyer###55066','qyer###55731','qyer###55767','qyer###56545','qyer###56704','qyer###57136','qyer###580659','qyer###59148','qyer###61830','qyer###62225','qyer###73619','qyer###497107','qyer###497197','qyer###497425','qyer###497745','qyer###497837','qyer###497976','qyer###51744','qyer###52401','qyer###52767','qyer###53047','qyer###54093','qyer###54123','qyer###54153','qyer###54223','qyer###54956','qyer###55045','qyer###497101','qyer###497164','qyer###497415','qyer###497728','qyer###497810','qyer###497909','qyer###498273','qyer###52336','qyer###52646','qyer###53038','qyer###54086','qyer###54111','qyer###54138','qyer###54181','qyer###54941','qyer###54999','qyer###55388','qyer###55743','qyer###55797','qyer###56676','qyer###56720','qyer###57347','qyer###59038','qyer###61827','qyer###61831','qyer###62227','qyer###73620','qyer###55714','qyer###55755','qyer###56511','qyer###56688','qyer###57083','qyer###57594','qyer###59055','qyer###61829','qyer###61988','qyer###73458','qyer###99574','qyer###55531','qyer###55750','qyer###55826','qyer###56682','qyer###56725','qyer###57373','qyer###59044','qyer###61828','qyer###61832','qyer###73441','qyer###85387','qyer###115578','qyer###115662','qyer###50170');'''
    data = []
    for line in MysqlSource(poi_ori_config, table_or_query=sql,
                            size=10000, is_table=False,
                            is_dict_cursor=True):
        data.append(line['id'])

    new_data = []
    for line in data:
        new_data.append(line)
        if len(new_data) == 1000:
            update_sql(new_data)
            new_data = []
    update_sql(new_data)
Exemplo n.º 27
0
def _delete_already_scanned_file():
    global offset
    _count = 0
    sql = '''SELECT pic_name
FROM PoiPictureInformation
WHERE is_scaned = 1
LIMIT {}, 999999999999;'''.format(offset)
    for line in MysqlSource(devdb_config, table_or_query=sql, size=10000, is_table=False, is_dict_cursor=True):
        f_path = line['pic_name']
        try:
            delete_file(f_path=f_path)
        except Exception as exc:
            logger.exception(msg="[delete file exception][f_path: {}]".format(f_path), exc_info=exc)

        if offset % 10000 == 0:
            logger.info("[delete file count][offset: {}]".format(offset))
        _count += 1
        offset += 1
Exemplo n.º 28
0
def get_task():
    g_dict = generate_qyer_url_id()
    sql = '''SELECT
  id,
  beentocount,
  plantocount,
  commentcount,
  qyer_url
FROM test_result_2;'''
    data = []
    _count = 0
    for uid, b_c, p_c, c_c, q_url in MysqlSource(poi_ori_config,
                                                 table_or_query=sql,
                                                 size=10000,
                                                 is_table=False,
                                                 is_dict_cursor=False):
        _count += 1
        if not str(q_url).endswith('/'):
            q_url += '/'
        q_url_id = re.findall('http://place.qyer.com/poi/(\S+?)/', q_url)[-1]

        bc_d = json.loads(b_c)
        del bc_d['qyer']
        pc_d = json.loads(p_c)
        del pc_d['qyer']
        cc_d = json.loads(c_c)
        del cc_d['qyer']

        res = g_dict.get(q_url_id)
        if res:
            bc, cc, pc = res
            bc_d['qyer'] = int(bc)
            cc_d['qyer'] = int(cc)
            pc_d['qyer'] = int(pc)
            print(uid, json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d),
                  q_url_id, q_url)
        else:
            print('##' * 10)
            print(uid, json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d),
                  q_url_id, q_url)
            print('##' * 10)

        logger.info("[count: {}]".format(_count))
        update_sql((json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d), uid))
Exemplo n.º 29
0
def hotel_unid_sid_set(source):
    query_sql = '''SELECT sid
FROM hotel_unid
WHERE source = '{}';'''.format(source)
    _set = set()
    count = 0
    for line in MysqlSource(base_data_config,
                            table_or_query=query_sql,
                            size=10000,
                            is_table=False,
                            is_dict_cursor=False):
        count += 1
        if count % 10000 == 0:
            logger.info("[prepare unid sid][source: {}][count: {}]".format(
                source, count))
        _set.add(line[0])
    logger.info("[prepare unid sid][source: {}][count: {}]".format(
        source, count))
    return _set
Exemplo n.º 30
0
def hotel_detail_sid_set(source, tag):
    table_name = 'detail_hotel_{}_{}'.format(source, tag)
    query_sql = '''SELECT source_id FROM {};'''.format(table_name)
    _set = set()
    count = 0
    for line in MysqlSource(service_platform_config,
                            table_or_query=query_sql,
                            size=10000,
                            is_table=False,
                            is_dict_cursor=False):
        count += 1
        if count % 10000 == 0:
            logger.info(
                "[prepare detail sid][table_name: {}][count: {}]".format(
                    table_name, count))
        _set.add(line[0])
    logger.info("[prepare detail sid][table_name: {}][count: {}]".format(
        table_name, count))
    return _set