def get_task(): sql = '''SELECT source, sid, suggest_type, suggest, city_id, country_id, s_city, s_region, s_country, s_extra, label_batch, others_info FROM ota_location_bak_1215;''' data = [] _count = 0 for line in MysqlSource(poi_ori_config, table_or_query=sql, size=2000, is_table=False, is_dict_cursor=False): _count += 1 new_line = list(line) new_line.insert(1, encode(line[1])) data.append(new_line) if len(data) == 1000: logger.info("[count: {}]".format(_count)) update_sql(data) data = [] update_sql(data)
def move_img_data(source_table_name): query_sql = '''SELECT file_name, source, sid, url, pic_size, bucket_name, url_md5, pic_md5, `use`, part, date, info FROM {};'''.format(source_table_name) data = [] _count = 0 for line in MysqlSource(poi_ori_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=True): _count += 1 data.append(line) if len(data) == 1000: insert_data(data) data = [] logger.debug("[table_name: {}][move data][count: {}]".format( source_table_name, _count)) if data: insert_data(data)
def move_img_data(): query_sql = '''SELECT file_name, sid, url, pic_size, bucket_name, url_md5, pic_md5, `use`, source, rank, fixrank, status, date FROM rest_bucket_relation;''' data = [] _count = 0 for line in MysqlSource(poi_ori_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=True): _count += 1 data.append(line) if len(data) == 1000: insert_data(data) data = [] logger.debug("[move data][count: {}]".format(_count)) if data: insert_data(data)
def _get_per_table_task_info(table_name): global offset sql = '''SELECT {0}.city_id AS poi_city_id, {0}.source AS poi_source, sid AS poi_sid, file_name AS pic_name FROM BaseDataFinal.poi_images JOIN {0} ON BaseDataFinal.poi_images.source = {0}.source AND BaseDataFinal.poi_images.sid = {0}.source_id WHERE city_id != 'NULL' and BaseDataFinal.poi_images.`use` != '0' LIMIT {1},999999999;'''.format(table_name, offset) data = [] _count = 0 for line in MysqlSource(service_platform_conf, table_or_query=sql, size=10000, is_table=False, is_dict_cursor=True): cid = line['poi_city_id'] c_grade = cid2grade[cid] source = line['poi_source'] sid = line['poi_sid'] pic_name = line['pic_name'] poi_id = '###'.join([source, sid]) data.append((cid, c_grade, poi_id, pic_name)) _count += 1 offset += 1 if len(data) == 2000: insert_task_data(data, _count) data = [] insert_task_data(data, _count)
def to_data(table_name): global offset select_sql = '''SELECT source, source_id, others_info FROM detail_hotel_{0}'''.format(table_name) try: _data = [] for result in MysqlSource(db_config=config, table_or_query=select_sql, size=10000, is_table=False, is_dict_cursor=True): offset += 1 others_info = result['others_info'] if not others_info: continue others_info = json.loads(others_info) if 'first_img' not in others_info: continue first_img_url = others_info['first_img'] if not is_legal(first_img_url): continue md5_str = encode(first_img_url) source = result['source'] source_id = result['source_id'] _data.append((source, source_id, md5_str)) if len(_data) % 1000 == 0: insert_db(table_name, _data) _data = [] insert_db(table_name, _data) except Exception as exc: logger.exception(msg="[入库出现异常]", exc_info=exc)
def _img_ori(_poi_type): global data global offset query_sql = '''SELECT id, image_list, first_image, official FROM {} ORDER BY id LIMIT {}, 99999999999999;'''.format(table_name, offset) _count = 0 cache = [] for _uid, _old_img_list, _old_first_img, _official in MysqlSource(poi_ori_config, table_or_query=query_sql, size=500, is_table=False, is_dict_cursor=False): cache.append((_uid, _old_img_list, _old_first_img, _official)) for _uid, _old_img_list, _old_first_img, _official in cache: pool.apply_async(_update_per_uid_img, (_uid, _poi_type, _old_img_list, _old_first_img, _official)) _count += 1 if _count % 1000 == 0: pool.join() update_img() data = [] offset += 1000 update_img() pool.join() update_img()
def get_tasks(): query_sql = '''SELECT uid FROM hotel ORDER BY uid;''' for _l in MysqlSource(db_config=spider_data_base_data_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=False): yield _l[0]
def get_file_name(): query_sql = '''SELECT source, source_id, pic_md5 FROM hotel_images WHERE part = '20171127a' AND info IS NULL;''' for line in MysqlSource(poi_ori_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=False): yield line
def main(): _sql = '''SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = 'tmp';''' for line in MysqlSource(db_config=spider_task_data_config, table_or_query=_sql, size=10000, is_table=False, is_dict_cursor=False): table_name = line[0] logger.info("[start][table_name: {}]".format(table_name)) detect_table(table_name=table_name)
def get_tasks(): query_sql = '''SELECT * FROM ota_location WHERE source = 'daodao' AND city_id in ('11444','60177','12344','60178','10436','60179','60180','30118','30140','50053','60181','10648','11424','60182','60183','50117','20096');''' for _l in MysqlSource(db_config=source_info_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=True): yield _l
def get_tasks(): query_sql = '''SELECT * FROM ota_location WHERE source = 'qyer' AND (json_extract(others_info, '$.from') IS NOT NULL OR json_extract(others_info, '$.form') IS NOT NULL) limit 10;''' for _l in MysqlSource(db_config=source_info_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=True): yield _l
def get_task(): sql = '''SELECT mioji_id FROM filter_data_already_online;''' data = [] for line in MysqlSource(poi_ori_config, table_or_query=sql, size=10000, is_table=False, is_dict_cursor=True): data.append(line['mioji_id']) if len(data) == 2000: update_sql(data) data = [] update_sql(data)
def get_tasks(city_id=None, config=None): query_sql = '''SELECT * FROM ota_location WHERE source = 'daodao' AND city_id in {0};'''.format(tuple(city_id)) for _l in MysqlSource(db_config=config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=True): yield _l
def get_task(): sql = '''SELECT sid FROM poi_images WHERE source = 'qyer' AND `use` = 1 GROUP BY sid HAVING count(*) > 90;''' for line in MysqlSource(poi_ori_config, table_or_query=sql, size=10000, is_table=False, is_dict_cursor=True): get_file(line['sid'])
def detect_table(table_name): c_dict = get_c_info() _sql = '''SELECT map_info, city_id, source, source_id FROM {} WHERE city_id != 'NULL' AND city_id IS NOT NULL;'''.format(table_name) offset = 0 error = 0 new_data = [] update_sql_name = 'no_cid_hotel|_|{}.sql'.format(table_name) del_sql_name = 'no_cid_hotel_del|_|{}.sql'.format(table_name) f_res = open(os.path.join(SQL_PATH, update_sql_name), 'w') f_del = open(os.path.join(SQL_PATH, del_sql_name), 'w') for line in MysqlSource(db_config=spider_task_data_config, table_or_query=_sql, size=10000, is_table=False, is_dict_cursor=True): offset += 1 _map_info = line['map_info'] _city_id = line['city_id'] _source = line['source'] _source_id = line['source_id'] _c_map_info = c_dict.get(_city_id) if not _c_map_info: continue dist = get_distance(_c_map_info, _map_info) if dist == -1: continue if get_distance(_c_map_info, _map_info) > 50: error += 1 new_data.append((_source, _source_id, _city_id)) if len(new_data) == 200: get_sql(table_name=table_name, res_f=f_res, res_del_f=f_del, data=new_data) new_data = [] logger.info( "[error_distance][offset: {}][error: {}][dist: {}][source: {}][source_id: {}][city_id: {}]".format( offset, error, dist, _source, _source_id, _city_id)) if new_data: get_sql(table_name=table_name, res_f=f_res, res_del_f=f_del, data=new_data) f_res.close() f_del.close() update_table(u_sql_name=update_sql_name, d_sql_name=del_sql_name)
def get_tasks(): # global offset # global pre_offset # query_sql = '''SELECT # source, # pic_md5, # file_md5, # info # FROM hotel_images # ORDER BY source,source_id # LIMIT {},999999999999999;'''.format(offset) # # for source, file_name, file_md5, info in MysqlSource(db_config=base_data_final_config, table_or_query=query_sql, # size=10000, is_table=False, # is_dict_cursor=False): # pre_offset += 1 # if not info: # yield source, file_name, file_md5, 'mioji-hotel', 'hotel' global offset global pre_offset query_sql = '''SELECT source, file_name, pic_md5, bucket_name, info FROM poi_images ORDER BY source, sid LIMIT {}, 999999999999999;'''.format(offset) for source, file_name, file_md5, bucket, info in MysqlSource( db_config=base_data_final_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=False): pre_offset += 1 if not info: if 'attr' in bucket: # bucket_name = 'mioji-attr' continue elif 'rest' in bucket: bucket_name = 'mioji-rest' # continue elif 'shop' in bucket: # bucket_name = 'mioji-shop' continue else: continue yield source, file_name, file_md5, bucket_name, 'poi'
def get_tasks(): global offset global pre_offset query_sql = '''SELECT uid FROM hotel ORDER BY uid LIMIT {}, 999999999999999;'''.format(offset) for line in MysqlSource(db_config=spider_data_base_data_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=False): pre_offset += 1 yield line[0]
def get_task(): g_dict = generate_qyer_url_id() sql = '''SELECT id, city_id, commentcount, beentocount, plantocount, json_extract(url, '$.qyer') FROM chat_attraction WHERE id IN ('v219493','v219498','v219500','v219745','v219897','v219918','v219932','v220018','v220094','v220160','v220315','v220329','v220403','v220406','v220514','v220516','v220519','v220542','v220543','v220545','v220639','v220657','v220760','v220775','v220776','v220800','v220802','v220805','v220833','v220834','v220836','v220837','v220838','v220943','v220972','v220999','v221104','v221118','v221122','v221123','v221124','v221401','v221407','v221411','v221414','v221415','v221416','v221417','v221419','v221420','v221652','v221848','v221874','v221938','v221939','v222126','v222129','v222141','v222147','v222160','v222168','v222272','v222402','v222490','v222497','v222521','v222538','v222542','v222545','v222554','v222592','v222593','v222784','v222828','v223852','v223875','v223896','v223901','v223946','v223957','v223976','v224021','v224040','v224057','v224072','v224087','v224100','v224105','v224120','v224144','v224145','v224173','v224225','v224227','v224241','v224252','v224273','v224287','v224327','v224332','v224339','v224344','v224456','v224482','v224507','v224516','v224529','v224542','v224555','v224595','v224596','v224598','v224629','v224662','v224679','v224684','v224693','v224694','v224715','v224723','v224724','v224726','v224736','v224738','v224767','v224785','v224825','v224830','v224844','v224860','v224871','v224880','v224904','v224920','v224934','v224945','v224946','v224962','v224971','v224985','v225005','v225026','v225028','v225046','v225059','v225069','v225073','v225077','v225082','v225112','v225178','v225181','v225184','v225202','v225227','v225267','v225290','v225299','v225316','v225348','v225354','v225366','v225390','v225396','v225419','v225428','v225451','v225453','v225504','v225507','v225510','v225524','v225580','v225592','v225595','v225620','v225640','v225644','v225646','v225649','v225673','v225692','v225701','v225709','v225717','v225734','v225739','v225753','v225758','v225831','v225832','v225833','v225860','v225863','v225872','v225908','v225925','v225932','v225951','v225977','v225981','v225988','v225997','v226022','v226028','v226049','v226054','v226105','v226124','v226139','v226219','v226228','v226250','v226311','v226322','v226327','v226371','v226387','v226392','v226435','v226470','v226475','v226493','v226530','v226544','v226553','v226556','v226568','v226572','v226577','v226584','v226637','v226644','v226653','v226692','v226721','v226732','v226734','v226737','v226809','v226813','v226828','v226833','v226883','v226886','v226892','v226907','v226926','v226943','v226974','v226975','v226986','v226998','v227046','v227063','v227071','v227083','v227087','v227100','v227101','v227127','v227134','v227135','v227149','v227158','v227190','v227221','v227278','v227303','v227311','v227312','v227330','v227347','v227349','v227350','v227376','v227380','v227384','v227387','v227414','v227418','v227518','v227543','v227576','v227579','v227633','v227647','v227718','v227765','v227782','v227811','v227816','v227830','v227836','v227849','v227866','v227877','v227893','v227903','v227919','v227925','v227953','v227981','v228002','v228029','v228071','v228086','v228097','v228107','v228123','v228141','v228151','v228182','v228186','v228247','v228257','v228258','v228260','v228287','v228307','v228314','v228319','v228336','v228366','v228368','v228407','v228416','v228429','v228430','v228441','v228450','v228489','v228505','v228506','v228535','v228575','v228582','v228639','v228640','v228658','v228671','v228688','v228706','v228707','v228709','v228733','v228770','v228773','v228793','v228796','v228804','v228817','v228824','v228839','v228885','v228897','v228925','v228927','v228958','v228971','v228997','v229023','v229059','v229068','v229073','v229087','v229115','v229126','v229133','v229145','v229185','v229190','v229194','v229204','v229206','v229226','v229261','v229267','v229275','v229278','v229291','v229292','v229392','v229401','v229404','v229413','v229429','v229432','v229476','v229496','v229531','v229547','v229555','v229620','v229625','v229652','v229656','v229662','v229681','v229703','v229721','v229734','v229766','v229769','v229818','v229820','v229889','v229897','v229905','v229922','v229932','v229954','v229975','v229996','v230020','v230063','v230064','v230111','v230113','v230124','v230150','v230156','v230178','v230182','v230187','v230227','v230231','v230287','v230297','v230298','v230317','v230384','v230397','v230412','v230423','v248961','v249003','v246711','v246736','v246741','v246748','v246769','v246770','v246775','v246783','v246786','v246788','v246789','v246790','v246791','v246793','v246794','v246795','v246797','v246852','v246856','v246861','v246867','v246877','v246887','v246888','v246893','v246894','v246897','v246900','v246906','v246910','v246912','v246913','v246914','v246915','v246916','v246917','v246918','v246919','v246972','v246977','v246996','v247002','v247003','v247010','v247011','v247016','v247019','v247024','v247030','v247036','v247038','v247039','v247089','v247092','v247110','v247119','v247121','v247122','v247127','v247130','v247132','v247137','v247142','v247144','v247153','v247201','v247219','v247223','v247227','v247235','v247242','v247246','v247248','v247255','v247296','v247307','v247319','v247322','v247329','v247333','v247345','v247358','v247362','v247363','v247365','v247368','v247405','v247406','v247409','v247412','v247415','v247419','v247421','v247426','v247432','v247433','v247436','v247437','v247438','v247439','v247440','v247441','v247443','v247444','v247445','v247446','v247447','v247449','v247450','v247452','v247453','v247455','v247457','v247537','v247539','v247542','v247549','v247554','v247558','v247560','v247564','v247567','v247568','v247569','v247570','v247571','v247572','v247573','v247574','v247576','v247577','v247578','v247579','v247581','v247582','v247583','v247584','v247585','v247586','v247648','v247661','v247664','v247665','v247670','v247672','v247697','v247702','v247715','v247719','v247723','v247730','v247741','v247765','v247768','v247779','v247784','v247794','v247798','v247799','v247804','v247820','v247826','v247837','v247863','v247871','v247876','v247883','v247884','v247896','v247902','v247904','v247907','v247910','v247911','v247912','v247913','v247914','v247915','v247916','v247917','v247918','v247919','v247922','v247926','v247932','v247935','v247939','v248019','v248027','v248029','v248032','v248034','v248038','v248042','v248049','v248052','v248055','v248058','v248059','v248060','v248061','v248062','v248063','v248064','v248065','v248066','v248070','v248074','v248077','v248080','v248082','v248169','v248176','v248185','v248188','v248200','v248206','v248208','v248209','v248210','v248215','v248220','v248221','v248280','v248286','v248288','v248289','v248291','v248295','v248302','v248303','v248304','v248306','v248307','v248308','v248309','v248310','v248311','v248312','v248313','v248318','v248319','v248323','v248327','v248332','v248333','v246701','v246729','v246735','v246742','v246756','v246759','v246765','v246767','v246768','v246773','v246774','v246776','v246785','v246792','v246862','v246865','v246881','v246884','v246890','v246895','v246898','v246901','v246904','v246908','v246981','v247001','v247006','v247008','v247009','v247014','v247022','v247023','v247026','v247029','v247034','v247037','v247043','v247083','v247085','v247086','v247098','v247106','v247112','v247116','v247123','v247124','v247133','v247135','v247138','v247140','v247143','v247145','v247146','v247199','v247203','v247220','v247222','v247226','v247230','v247237','v247245','v247297','v247320','v247327','v247330','v247334','v247361','v247404','v247408','v247411','v247414','v247417','v247418','v247420','v247429','v247430','v247492','v247535','v247540','v247545','v247556','v247565','v247647','v247650','v247663','v247668','v247669','v247678','v247681','v247688','v247695','v247700','v247714','v247718','v247724','v247725','v247727','v247769','v247774','v247776','v247785','v247788','v247791','v247800','v247801','v247805','v247817','v247821','v247865','v247875','v247878','v247887','v247892','v247894','v247901','v247906','v247909','v247921','v247925','v247929','v247931','v247934','v247937','v247938','v247941','v248015','v248025','v248030','v248035','v248039','v248047','v248050','v248053','v248056','v248071','v248075','v248078','v248083','v248085','v248090','v248101','v248161','v248163','v248171','v248174','v248179','v248199','v248205','v248212','v248214','v248218','v248219','v248224','v248267','v248296','v248297','v248299','v248315','v248320','v248321','v248324','v248328','v248329','v248334','v223858','v223860','v223872','v223898','v223924','v223950','v223952','v223954','v223992','v224023','v224083','v224217','v224223','v224247','v224253','v224319','v224331','v224522','v224531','v224562','v224650','v224712','v224817','v224833','v224881','v224947','v224970','v224973','v224976','v224979','v225016','v225024','v225044','v225139','v225165','v225187','v225189','v225361','v225449','v225486','v225513','v225659','v225715','v225747','v225784','v225837','v225904','v225913','v226014','v226024','v226087','v226144','v226145','v226169','v226268','v226308','v226310','v226312','v226332','v226342','v226376','v226408','v226409','v226414','v226575','v226643','v226695','v226719','v226728','v226853','v226901','v227128','v227184','v227226','v227254','v227356','v227398','v227400','v227525','v227593','v227625','v227671','v227685','v227761','v227801','v227824','v227827','v227861','v227891','v227904','v227945','v228103','v228104','v228128','v228139','v228172','v228179','v228282','v228291','v228329','v228661','v228673','v228679','v228692','v228704','v228716','v228761','v228800','v228805','v228972','v229025','v229081','v229109','v229151','v229244','v229256','v229319','v229348','v229372','v229433','v229438','v229449','v229454','v229477','v229552','v229594','v229636','v229697','v229715','v229761','v229924','v229964','v229981','v230012','v230026','v230084','v230185','v230278','v230318','v230338','v230348','v230409','v231507','v231661','v231833','v232874','v233082','v233224','v233709','v233808','v234353','v235111','v235325','v236475','v237091','v237353','v237862','v237890','v238094','v238237','v239713','v239846','v241035','v241985','v242366','v242409','v242717','v242804','v243215','v243475','v243548','v243772','v246698','v246723','v246740','v246743','v246751','v246760','v246763','v246777','v246853','v246857','v246866','v246880','v246907','v246971','v246976','v246997','v247015','v247082','v247084','v247093','v247095','v247111','v247131','v247134','v247200','v247202','v247214','v247238','v247241','v247243','v247298','v247301','v247323','v247326','v247422','v247431','v247448','v247538','v247550','v247555','v247671','v247674','v247682','v247689','v247696','v247701','v247766','v247773','v247775','v247780','v247787','v247796','v247862','v247870','v247879','v247882','v247893','v247895','v247940','v248016','v248028','v248043','v248088','v248155','v248167','v248172','v248175','v248277','v248281','v248287','v246728','v246873','v246987','v247102','v247154','v247218','v247306','v247692','v248157','v248165','v248196','v245354','v245370','v245387','v245437','v245448','v245498','v245541','v245550','v245560','v245572','v245591','v245595','v245598','v245643','v245645','v245646','v245649','v245659','v245664','v245667','v245676','v245678','v245679','v245682','v245684','v245685','v245694','v245696','v245697','v245700','v245701','v245712','v245718','v245728','v245730','v245731','v245733','v245771','v245774','v245777','v245793','v245808','v245835','v245836','v245854','v245855','v245859','v245884','v245887','v245889','v245905','v245906','v245908','v245912','v245914','v245920','v245921','v245922','v245924','v245941','v245944','v245946','v245947','v245969','v245990','v246035','v246059','v246066','v246070','v246071','v246072','v246073','v246074','v246075','v246085','v246087','v246088','v246089','v246090','v246104','v246105','v246108','v246109','v246110','v246112','v246122','v246125','v246128','v246130','v246134','v246138','v246142','v246143','v246149','v246167','v246184','v246188','v246211','v246212','v246213','v246214','v246215','v246216','v246219','v246222','v246264','v246280','v246281','v246304','v246312','v246314','v246322','v246327','v246334','v246337','v246404','v246408','v246412','v246413','v246455','v246456','v246457','v246458','v246459','v246460','v246463','v246482','v246503','v246505','v246506','v246509','v246511','v246513','v246516','v246526','v246546','v246547','v246570','v246577','v246579','v246582','v246619','v246620','v246622','v246624','v246626','v246628','v246631','v246633','v246635','v246637','v246646','v246650','v246651','v246653','v246655','v246656','v246657','v246673','v246675','v246718','v246727','v246758','v246761','v246778','v246780','v246855','v246870','v246879','v246882','v246956','v246988','v246991','v246999','v247099','v247105','v247150','v247205','v247213','v247234','v247292','v247349','v247491','v247543','v247864','v247890','v247898','v247947','v248017','v248020','v248041','v248046','v248158','v248183','v248195','v248203','v248279','v248284','v245364','v245373','v245449','v245450','v245453','v245455','v245456','v245476','v245477','v245482','v245483','v245484','v245485','v245486','v245488','v245489','v245491','v245492','v245494','v245495','v245496','v245497','v245515','v245516','v245517','v245519','v245520','v245521','v245522','v245527','v245528','v245539','v245542','v245544','v245546','v245552','v245554','v245569','v245571','v245592','v245633','v245634','v245635','v245636','v245637','v245638','v245639','v245640','v245641','v245642','v245681','v245686','v245687','v245688','v245689','v245690','v245691','v245692','v245693','v245703','v245704','v245705','v245706','v245707','v245708','v245709','v245710','v245711','v245713','v245714','v245717','v245727','v245749','v245810','v245812','v245813','v245814','v245815','v245818','v245837','v245838','v245843','v245845','v245846','v245847','v245848','v245849','v245850','v245851','v245852','v245891','v245892','v245893','v245894','v245895','v245897','v245898','v245903','v245904','v245928','v245959','v245963','v245964','v245965','v245966','v245967','v245968','v245971','v246019','v246020','v246021','v246023','v246024','v246025','v246026','v246027','v246031','v246033','v246037','v246064','v246068','v246069','v246076','v246077','v246078','v246079','v246080','v246082','v246083','v246084','v246106','v246107','v246111','v246114','v246118','v246124','v246126','v246127','v246129','v246132','v246133','v246136','v246158','v246161','v246162','v246186','v246190','v246192','v246199','v246208','v246217','v246221','v246223','v246259','v246263','v246292','v246293','v246295','v246296','v246297','v246298','v246299','v246300','v246309','v246315','v246319','v246320','v246324','v246331','v246333','v246339','v246402','v246411','v246415','v246416','v246417','v246418','v246422','v246444','v246447','v246448','v246449','v246450','v246451','v246452','v246453','v246454','v246461','v246462','v246483','v246486','v246510','v246512','v246514','v246517','v246520','v246521','v246523','v246525','v246527','v246528','v246529','v246531','v246542','v246544','v246550','v246551','v246553','v246571','v246573','v246581','v246630','v246659','v246661','v246663','v246665','v246667','v245353','v245357','v245358','v245362','v245363','v245366','v245368','v245371','v245372','v245376','v245388','v245389','v245401','v245430','v245432','v245433','v245435','v245436','v245474','v245480','v245499','v245500','v245501','v245502','v245503','v245514','v245523','v245525','v245526','v245530','v245532','v245534','v245535','v245538','v245553','v245555','v245557','v245570','v245590','v245594','v245596','v245597','v245600','v245644','v245648','v245650','v245665','v245666','v245668','v245669','v245670','v245671','v245672','v245675','v245677','v245699','v245715','v245716','v245719','v245721','v245722','v245723','v245724','v245725','v245726','v245729','v245732','v245738','v245776','v245778','v245780','v245794','v245795','v245796','v245819','v245820','v245821','v245822','v245823','v245824','v245839','v245840','v245841','v245842','v245844','v245853','v245856','v245857','v245862','v245863','v245880','v245881','v245883','v245885','v245888','v245899','v245900','v245901','v245907','v245909','v245917','v245918','v245919','v245923','v245942','v245948','v245970','v245972','v245989','v246029','v246036','v246038','v246051','v246052','v246053','v246054','v246055','v246056','v246057','v246058','v246060','v246061','v246063','v246065','v246067','v246086','v246100','v246103','v246113','v246115','v246117','v246120','v246121','v246131','v246135','v246137','v246139','v246144','v246159','v246164','v246165','v246185','v246189','v246197','v246198','v246209','v246220','v246261','v246262','v246266','v246267','v246285','v246286','v246290','v246301','v246302','v246305','v246306','v246308','v246313','v246316','v246317','v246325','v246326','v246328','v246329','v246330','v246332','v246335','v246336','v246338','v246352','v246353','v246355','v246406','v246407','v246409','v246410','v246439','v246440','v246441','v246442','v246464','v246466','v246467','v246468','v246469','v246484','v246487','v246488','v246489','v246490','v246491','v246492','v246493','v246495','v246498','v246499','v246500','v246501','v246502','v246504','v246508','v246522','v246524','v246530','v246543','v246548','v246585','v246623','v246629','v246647','v246669','v246676','v246679','v246697','v246705','v246730','v246737','v246738','v246744','v246757','v246764','v246787','v246830','v246859','v246860','v246868','v246875','v246876','v246883','v246958','v246973','v246978','v246983','v247012','v247017','v247088','v247091','v247103','v247109','v247126','v247129','v247151','v247206','v247207','v247216','v247224','v247228','v247236','v247258','v247295','v247311','v247318','v247325','v247328','v247332','v247336','v247364','v247366','v247424','v247427','v247442','v247548','v247553','v247557','v247559','v247563','v247675','v247680','v247684','v247693','v247698','v247716','v247720','v247767','v247772','v247782','v247783','v247790','v247793','v247819','v247823','v247825','v247859','v247860','v247868','v247877','v247880','v247885','v247889','v247897','v247923','v247927','v247942','v247944','v248037','v248067','v248069','v248073','v248162','v248182','v248202','v248207','v248266','v248275','v248285','v248290','v513556','v246691','v246712','v246734','v246739','v246745','v246762','v246766','v246772','v246779','v246781','v246784','v246829','v246849','v246858','v246863','v246878','v246885','v246891','v246892','v246899','v246905','v246955','v246974','v246979','v246990','v246995','v247004','v247005','v247007','v247013','v247018','v247020','v247021','v247025','v247028','v247032','v247033','v247040','v247087','v247090','v247096','v247113','v247115','v247117','v247118','v247120','v247125','v247128','v247136','v247139','v247141','v247149','v247152','v247204','v247225','v247229','v247247','v247249','v247308','v247321','v247324','v247331','v247335','v247348','v247407','v247413','v247416','v247425','v247428','v247434','v247493','v247536','v247547','v247552','v247562','v247566','v247649','v247662','v247666','v247667','v247676','v247677','v247694','v247713','v247717','v247721','v247726','v247728','v247729','v247742','v247763','v247770','v247771','v247781','v247786','v247789','v247792','v247806','v247818','v247822','v247861','v247881','v247886','v247899','v247903','v247905','v247908','v247920','v247928','v247930','v247933','v247936','v247946','v248018','v248031','v248036','v248048','v248051','v248054','v248072','v248076','v248081','v248084','v248086','v248166','v248173','v248177','v248178','v248181','v248189','v248211','v248213','v248216','v248217','v248222','v248223','v248276','v248278','v248282','v248292','v248300','v248316','v248322','v248325','v248330','v248331','v245558','v245559','v245561','v245562','v245563','v245564','v245565','v245566','v245567','v245568','v245926','v245927','v245931','v245932','v245933','v245934','v245935','v245936','v245937','v246172','v246173','v246178','v246287','v246378','v246379','v246382','v246383','v246384','v246385','v245355','v245360','v245369','v245386','v245404','v245452','v245457','v245458','v245459','v245461','v245462','v245463','v245464','v245465','v245469','v245475','v245479','v245481','v245487','v245493','v245504','v245505','v245508','v245509','v245510','v245511','v245512','v245513','v245529','v245533','v245537','v245540','v245545','v245547','v245549','v245593','v245627','v245628','v245629','v245630','v245631','v245632','v245647','v245660','v245661','v245674','v245683','v245702','v245739','v245770','v245772','v245781','v245782','v245825','v245827','v245828','v245829','v245831','v245833','v245834','v245858','v245860','v245872','v245873','v245874','v245875','v245878','v245882','v245890','v245896','v245902','v245910','v245911','v245925','v245930','v245950','v246032','v246047','v246048','v246081','v246091','v246092','v246093','v246094','v246095','v246096','v246097','v246098','v246099','v246101','v246102','v246141','v246163','v246187','v246193','v246194','v246210','v246248','v246249','v246250','v246251','v246252','v246351','v246354','v246372','v246377','v246403','v246405','v246419','v246420','v246423','v246424','v246425','v246426','v246427','v246428','v246430','v246431','v246432','v246433','v246434','v246435','v246436','v246437','v246438','v246443','v246445','v246446','v246471','v246472','v246473','v246474','v246475','v246477','v246480','v246481','v246611','v246612','v246613','v246614','v246617','v246618','v246632','v246634','v246638','v246639','v246640','v246641','v246642','v246643','v246644','v246645','v246648','v246649','v246652','v246658','v246664','v246726','v246753','v246954','v246985','v247147','v247215','v247401','v248154','v248159','v248198','v248270','v246747','v246851','v246982','v247000','v247305','v247402','v248089','v248192','v248271','v245466','v245467','v245470','v245471','v245472','v245473','v245531','v245536','v245548','v245551','v245574','v245575','v245576','v245577','v245951','v245952','v245953','v245954','v245955','v245956','v245957','v245958','v245961','v245962','v246724','v246828','v246848','v246871','v246984','v246993','v247104','v247108','v247209','v247291','v247293','v247310','v247400','v247490','v247544','v247683','v247691','v247869','v248197','v248272','v248293','v246731','v246854','v246872','v246980','v247100','v247399','v247686','v248023','v248153','v248191','v248269','v569633','v569642','v569667','v569671','v569696','v569714','v569722','v569723','v569740','v569742','v569748','v569783','v569793','v569794','v569804','v569805','v569816','v569836','v701409','v701410','v701411','v701413','v701414','v701417','v701418','v701419','v701420','v701421','v701422','v701423','v701424','v701425','v701426','v701427','v701428','v701429','v701430','v701431','v701432','v701433','v701434','v701435','v701436','v701437','v701438','v701439','v701440','v701441','v701442','v701443','v701444','v701445','v701446','v701447','v701448','v701449','v701450','v701451','v701452','v701453','v701454','v701455','v701456','v701457','v701458','v701459','v701460','v701461','v701462','v701463','v701464','v701465','v701466','v701467','v701468','v701469','v701470','v701471','v701472','v701473','v701474','v701475','v701476','v701477','v701478','v701479','v701480','v701481','v701482','v701483','v701484','v701485','v701486','v701487','v701488','v701489','v701490','v701491','v701492','v701493','v701494','v701495','v701496','v701497','v701498','v701499','v701500','v701501','v701502','v701503','v701504','v701505','v701506','v701507','v701508','v701509','v701510','v701511','v701512','v701513','v701514','v701515','v701516','v701517','v701518','v701519','v701520','v701521','v701522','v701523','v701524','v701525','v639923','v639924','v639925','v639926','v639927','v639928','v639929','v639930','v639931','v639932','v639933','v639934','v639935','v639936','v639937','v639938','v639939','v639940','v639941','v639942','v639943','v515408','v515409','v515410','v515412','v515413','v515414','v515415','v515416','v515417','v515418','v515419','v515420','v515421','v515422','v515423','v515424','v515425','v515426','v701542','v701543','v701548','v701557','v701558','v701559','v701560','v701561','v701562','v701563','v701564','v701565','v701566','v701567','v701568','v701569','v701570','v701571','v701572','v701573','v701574','v701575','v701576','v701577','v701578','v701579','v701580','v701581','v701582','v701583','v701584','v701585','v701586','v701587','v701588','v701589','v701590','v701591','v701592','v701593','v701594','v701595','v701596','v701597','v701598','v701599','v701600','v701601','v701602','v701603','v701604','v701605','v701606','v701607','v701608','v701609','v701610','v701611','v701612','v701613','v701614','v701615','v701616','v701617','v701618','v701619','v701620','v701621','v701622','v701623','v701624','v701625','v701626','v701627','v701628','v701629','v701630','v701661','v701664','v515427','v515428','v515429','v515430','v515432','v515433','v515434','v515435','v639944','v639945','v639946','v701845','v701846','v701847','v701848','v701849','v701850','v701851','v701852','v701853','v701854','v701855','v701856','v701859','v701860','v639947','v639948','v639949','v639950','v639951','v639952','v639953','v639954','v639955','v639956','v639957','v639958','v639959','v639960','v639961','v639962','v639963');''' data = [] _count = 0 for uid, city_id, b_c, p_c, c_c, q_url in MysqlSource( poi_ori_config, table_or_query=sql, size=10000, is_table=False, is_dict_cursor=False): _count += 1 if not str(q_url).endswith('/'): q_url += '/' q_url_id = re.findall('http://place.qyer.com/poi/(\S+?)/', q_url)[-1] bc_d = json.loads(b_c) # if 'qyer' in bc_d: # del bc_d['qyer'] pc_d = json.loads(p_c) # if 'qyer' in pc_d: # del pc_d['qyer'] cc_d = json.loads(c_c) # del cc_d['qyer'] res = g_dict.get(q_url_id) if res: bc, cc, pc, s_id = res bc_d['qyer'] = int(bc) cc_d['qyer'] = int(cc) pc_d['qyer'] = int(pc) print(uid, json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d), q_url_id, q_url, s_id) else: print('##' * 10) print(uid, json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d), q_url_id, q_url, s_id) print('##' * 10) logger.info("[count: {}]".format(_count)) update_sql((json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d), uid))
def get_task(): sql = '''SELECT uid FROM hotel_unid WHERE source='accor';''' u_l = [] _count = 0 for line in MysqlSource(base_data_config, table_or_query=sql, size=10000, is_table=False): _count += 1 u_l.append(line[0]) if len(u_l) % 5000 == 0: reset_task(u_l) logger.info("[total: {}]".format(_count)) if u_l: reset_task(u_l)
def get_old_info_dict(): sql = '''SELECT id, source, name, name_en, map_info, address, plantocounts, beentocounts, ranking, grade, commentcounts, imgurl, introduction, opentime FROM poi_merge.attr WHERE source='qyer';''' __dict = defaultdict(dict) _count = 0 for line in MysqlSource(poi_ori_config, table_or_query=sql, size=5000, is_table=False, is_dict_cursor=True): _count += 1 if _count % 3000 == 0: logger.info("[load old data info][count: {}]".format(_count)) sid = line['id'] for key_name, is_strict, num_check in check_name: if is_strict: __dict[sid][key_name] = line[key_name] else: legal_res = is_legal(line[key_name]) if not num_check: check_res = legal_res else: try: if int(legal_res) in (-1, 0): check_res = False else: check_res = True except Exception: check_res = False __dict[sid][key_name] = check_res logger.info("[load old data info finished][count: {}]".format(_count)) return __dict
def detect(): conn = create_engine(spider_data_tmp_str) table = pandas.read_sql(sql='''SELECT file_name, sid, url, pic_size, bucket_name, url_md5, pic_md5, `use`, source, status, date FROM shop_bucket_relation LIMIT 0;''', con=conn) table['width'] = '' table['height'] = '' sql = '''SELECT file_name, sid, url, pic_size, bucket_name, url_md5, pic_md5, `use`, source, status, date FROM shop_bucket_relation WHERE source IN ('daodao', 'machine', 'NULL') AND `use` = 1 AND pic_size!='NULL';''' _count = 0 for line in MysqlSource(db_config=spider_data_tmp_config, table_or_query=sql, size=1024, is_table=False, is_dict_cursor=True): _count += 1 if _count % 1024 == 0: print("now: {}".format(_count)) width, height = eval(line['pic_size']) width = int(width) height = int(height) line['width'] = width line['height'] = height if width == height: new_row = pandas.DataFrame([line]) table = table.append(new_row) return table
def report(): query_sql = '''SELECT id, url FROM chat_attraction;''' union_dict = defaultdict(set) _count = 0 for line in MysqlSource(poi_ori_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=True): _count += 1 if _count % 10000 == 0: logger.debug("[now count: {}]".format(_count)) _id = line['id'] _url = line['url'] if not _url: continue urls = json.loads(_url) if 'qyer' in urls: try: _source = 'qyer' _sid = re.findall('place.qyer.com/poi/([\s\S]+)/', urls['qyer'])[0] union_dict[(_source, _sid)].add(_id) except Exception: pass if 'daodao' in urls: try: _source = 'daodao' _sid = re.findall('-d(\d+)', urls['daodao'])[0] union_dict[(_source, _sid)].add(_id) except Exception: pass _count = 0 for k, v in union_dict.items(): if len(v) > 1: _count += 1 logger.info( "[ source, sid : {} ][ can be merged uid : {} ]".format(k, v)) logger.info("[total: {}]".format(_count))
def update_per_hotel_validation(self, env='test'): data = [] if env == 'test': db_conf = test_db elif env == 'online': db_conf = online_db else: raise TypeError("Unknown Env: {}".format(env)) sql = '''SELECT source, sid, uid, mid, name, name_en, hotel_url FROM hotel_unid LIMIT {},999999999999;'''.format(self.offset) for line in MysqlSource(db_conf, table_or_query=sql, size=10000, is_table=False, is_dict_cursor=True): source = line['source'] try: ret_data = self.get_content(source=source, line=line) if ret_data: data.append(ret_data) except ReportException as r_exc: logger.warning("[report error][msg: {}]".format(str(r_exc))) self.report_dict[(str(r_exc), r_exc.type)] += 1 except Exception as exc: logger.exception( msg="[make workload key has exception][source: {}]".format( source), exc_info=exc) raise exc self.pre_offset += 1 if len(data) == 2000: # replace into validation data self.insert_data(data) data = [] # replace into validation data self.insert_data(data)
def get_task(): sql = '''SELECT file_name FROM error_f_md5_file;''' data = [] _count = 0 for line in MysqlSource(poi_ori_config, table_or_query=sql, size=10000, is_table=False, is_dict_cursor=False): _count += 1 data.append(line[0]) if len(data) == 1000: logger.info("[count: {}]".format(_count)) update_sql(data) data = [] update_sql(data)
def task(): query_sql = '''SELECT sid, file_name, bucket_name FROM poi_images WHERE source = 'online' AND bucket_name LIKE '%attr%' AND sid LIKE 'v%' AND `use` = 1 ORDER BY sid;''' old_poi_id = None img_name_set = set() _count = 0 result_f = open('/tmp/img_res_new', mode='w') for line in MysqlSource(poi_ori_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=True): _count += 1 if _count % 3000 == 0: logger.debug("[now count: {}]".format(_count)) if 'attr' not in line['bucket_name'] and not line['sid'].startswith( 'v'): continue # 先获取 poi id poi_id = line['sid'] # id 变更后,查找图片,重新生成 if poi_id != old_poi_id: if old_poi_id is not None: has_detected_pic_file = get_poi_pic_detect(old_poi_id) lost_img = (img_name_set - has_detected_pic_file) for i in lost_img: logger.debug( "[img not detected][poi_id: {}][img: {}]".format( old_poi_id, i)) result_f.write('{}###{}\n'.format(old_poi_id, i)) old_poi_id = poi_id img_name_set = set() file_name = line['file_name'] img_name_set.add(file_name)
def get_task(): sql = '''SELECT id FROM pic_detect_task WHERE poi_id IN ('qyer###558781','qyer###52728','qyer###54661','qyer###558134','qyer###558215','qyer###558233','qyer###558267','qyer###558285','qyer###558305','qyer###558448','qyer###558491','qyer###558547','qyer###558708','qyer###558745','qyer###558820','qyer###558879','qyer###56415','qyer###52640','qyer###52743','qyer###54680','qyer###558186','qyer###558217','qyer###558241','qyer###558269','qyer###558287','qyer###558309','qyer###558450','qyer###54688','qyer###558193','qyer###558218','qyer###558248','qyer###558272','qyer###558289','qyer###558318','qyer###558452','qyer###558502','qyer###558571','qyer###558717','qyer###558765','qyer###558844','qyer###558915','qyer###57716','qyer###558500','qyer###558565','qyer###558712','qyer###558759','qyer###558841','qyer###558907','qyer###56478','qyer###52736','qyer###54665','qyer###558182','qyer###558216','qyer###558240','qyer###558268','qyer###558286','qyer###558307','qyer###558449','qyer###558493','qyer###54804','qyer###558197','qyer###558223','qyer###558251','qyer###558276','qyer###558291','qyer###558420','qyer###558472','qyer###558506','qyer###558576','qyer###558721','qyer###558770','qyer###558862','qyer###558933','qyer###57721','qyer###54711','qyer###558194','qyer###558222','qyer###558249','qyer###558275','qyer###558290','qyer###558320','qyer###558462','qyer###558504','qyer###558559','qyer###558709','qyer###558750','qyer###558821','qyer###558881','qyer###56421','qyer###558572','qyer###558719','qyer###558768','qyer###558858','qyer###558929','qyer###57719','qyer###558875','qyer###485076','qyer###485000','qyer###118786','qyer###123633','qyer###1329647','qyer###122769','qyer###1414076','qyer###447276','qyer###415327','qyer###86734','qyer###86767','qyer###1207837','qyer###109110','qyer###1207832','qyer###485285','qyer###203099','qyer###105666','qyer###537021','qyer###1451741','qyer###35050','qyer###72483','qyer###82690','qyer###86904','qyer###98539','qyer###69988','qyer###72489','qyer###83266','qyer###87010','qyer###99646','qyer###202648','qyer###69966','qyer###72471','qyer###72491','qyer###84214','qyer###90126','qyer###84188','qyer###88709','qyer###99647','qyer###100230','qyer###106478','qyer###109768','qyer###109849','qyer###109860','qyer###1140829','qyer###116647','qyer###117620','qyer###1209574','qyer###122317','qyer###122455','qyer###123255','qyer###123767','qyer###125028','qyer###1447115','qyer###164229','qyer###102176','qyer###106827','qyer###109840','qyer###109852','qyer###113225','qyer###1163474','qyer###116651','qyer###117851','qyer###1209668','qyer###122337','qyer###122676','qyer###123673','qyer###124124','qyer###1321717','qyer###1454527','qyer###164679','qyer###102167','qyer###106724','qyer###109835','qyer###109850','qyer###111101','qyer###1140830','qyer###116648','qyer###117733','qyer###1209648','qyer###122332','qyer###122456','qyer###123346','qyer###124095','qyer###125610','qyer###1451260','qyer###164231','qyer###102169','qyer###106826','qyer###109838','qyer###109851','qyer###111516','qyer###116030','qyer###116650','qyer###117840','qyer###1209649','qyer###122336','qyer###122675','qyer###123348','qyer###124113','qyer###1321712','qyer###1451997','qyer###164652','qyer###181079','qyer###184915','qyer###202978','qyer###34984','qyer###38773','qyer###39787','qyer###41399','qyer###42506','qyer###42796','qyer###43168','qyer###45192','qyer###46181','qyer###48982','qyer###50242','qyer###51537','qyer###52503','qyer###181078','qyer###184896','qyer###202976','qyer###34964','qyer###38074','qyer###39676','qyer###41387','qyer###42207','qyer###42790','qyer###43147','qyer###45183','qyer###46129','qyer###48966','qyer###50064','qyer###51503','qyer###51876','qyer###181084','qyer###196701','qyer###204467','qyer###35069','qyer###38884','qyer###40533','qyer###41637','qyer###42519','qyer###42804','qyer###43426','qyer###45313','qyer###46272','qyer###49064','qyer###51436','qyer###51613','qyer###52523','qyer###181081','qyer###185944','qyer###203542','qyer###35061','qyer###38843','qyer###40518','qyer###41582','qyer###42509','qyer###42799','qyer###43308','qyer###45307','qyer###46231','qyer###49026','qyer###51186','qyer###51602','qyer###52518','qyer###53417','qyer###538515','qyer###54045','qyer###55213','qyer###55998','qyer###56407','qyer###59328','qyer###61793','qyer###61804','qyer###88358','qyer###94929','qyer###94951','qyer###95014','qyer###53479','qyer###538557','qyer###54172','qyer###55218','qyer###56003','qyer###580621','qyer###59332','qyer###61795','qyer###61806','qyer###88714','qyer###94936','qyer###94952','qyer###95015','qyer###53562','qyer###538719','qyer###54333','qyer###55233','qyer###56029','qyer###581083','qyer###59346','qyer###61798','qyer###61812','qyer###94904','qyer###94940','qyer###94956','qyer###95018','qyer###53538','qyer###538599','qyer###54232','qyer###55230','qyer###56008','qyer###580622','qyer###59339','qyer###61796','qyer###61811','qyer###94855','qyer###94938','qyer###94955','qyer###95017','qyer###123136','qyer###204725','qyer###94916','qyer###123302','qyer###566546','qyer###566263','qyer###568699','qyer###566315','qyer###1448695','qyer###201860','qyer###59125','qyer###73868','qyer###118188','qyer###86694','qyer###57004','qyer###201651','qyer###279997','qyer###428505','qyer###429488','qyer###429764','qyer###430995','qyer###431804','qyer###454679','qyer###459803','qyer###48114','qyer###48754','qyer###501134','qyer###502112','qyer###502038','qyer###545884','qyer###59502','qyer###82246','qyer###82914','qyer###84104','qyer###84342','qyer###84555','qyer###95023','qyer###98561','qyer###99217','qyer###51086','qyer###54023','qyer###545222','qyer###546012','qyer###61971','qyer###82247','qyer###82915','qyer###84105','qyer###84491','qyer###84944','qyer###95029','qyer###98569','qyer###99749','qyer###48453','qyer###501010','qyer###501566','qyer###503610','qyer###53426','qyer###544687','qyer###545800','qyer###581581','qyer###1146305','qyer###429684','qyer###430951','qyer###431567','qyer###454146','qyer###459362','qyer###47942','qyer###48666','qyer###501019','qyer###501568','qyer###50385','qyer###82203','qyer###82908','qyer###84102','qyer###84340','qyer###84539','qyer###84956','qyer###98554','qyer###99207','qyer###53513','qyer###544696','qyer###545861','qyer###59501','qyer###82204','qyer###82909','qyer###84103','qyer###84341','qyer###84553','qyer###86771','qyer###98558','qyer###99208','qyer###430488','qyer###545436','qyer###455435','qyer###546034','qyer###107362','qyer###108831','qyer###111494','qyer###1144714','qyer###1204856','qyer###1205859','qyer###1206848','qyer###1208031','qyer###122414','qyer###123915','qyer###1448381','qyer###1448740','qyer###1449249','qyer###1452521','qyer###105743','qyer###107346','qyer###108830','qyer###109732','qyer###1144125','qyer###119723','qyer###1205857','qyer###1206845','qyer###1208030','qyer###121124','qyer###123496','qyer###1448001','qyer###1448714','qyer###1449231','qyer###1452431','qyer###182976','qyer###204006','qyer###206337','qyer###207792','qyer###39584','qyer###41566','qyer###452383','qyer###452462','qyer###45601','qyer###48602','qyer###513149','qyer###55168','qyer###62009','qyer###66090','qyer###66100','qyer###200638','qyer###204108','qyer###206341','qyer###207793','qyer###40316','qyer###41604','qyer###452393','qyer###452472','qyer###45685','qyer###48612','qyer###513155','qyer###55625','qyer###62143','qyer###66091','qyer###66101','qyer###81521','qyer###81520','qyer###81528','qyer###84928','qyer###89158','qyer###89171','qyer###89373','qyer###94790','qyer###81529','qyer###84929','qyer###89164','qyer###89172','qyer###89374','qyer###94803','qyer###107332','qyer###108821','qyer###108909','qyer###1144121','qyer###119661','qyer###1205785','qyer###120660','qyer###1208029','qyer###1209153','qyer###123487','qyer###1447998','qyer###1448704','qyer###1449207','qyer###1452383','qyer###106682','qyer###108813','qyer###108908','qyer###1139484','qyer###119658','qyer###1205708','qyer###120658','qyer###1208028','qyer###1209127','qyer###123479','qyer###1446211','qyer###1448532','qyer###1449206','qyer###1451992','qyer###164736','qyer###203521','qyer###206336','qyer###207791','qyer###35544','qyer###41485','qyer###452375','qyer###452457','qyer###452493','qyer###48549','qyer###513133','qyer###53688','qyer###61826','qyer###66085','qyer###66099','qyer###81519','qyer###164734','qyer###202749','qyer###206095','qyer###207790','qyer###34616','qyer###41481','qyer###452370','qyer###452450','qyer###452490','qyer###47367','qyer###513130','qyer###513199','qyer###61825','qyer###66084','qyer###66097','qyer###66167','qyer###81527','qyer###84926','qyer###89157','qyer###89170','qyer###89372','qyer###89786','qyer###81526','qyer###84925','qyer###89155','qyer###89169','qyer###89250','qyer###89621','qyer###99414','qyer###103453','qyer###107466','qyer###34482','qyer###1448825','qyer###581276','qyer###84933','qyer###104804','qyer###108004','qyer###1144553','qyer###1149827','qyer###116590','qyer###117174','qyer###117183','qyer###117187','qyer###117191','qyer###117210','qyer###1201726','qyer###1201865','qyer###1207499','qyer###1211088','qyer###1211486','qyer###1211888','qyer###106229','qyer###1101051','qyer###1144554','qyer###1161161','qyer###117170','qyer###117178','qyer###117184','qyer###117188','qyer###117193','qyer###117212','qyer###1201731','qyer###1201871','qyer###1208371','qyer###1211414','qyer###1211636','qyer###1211993','qyer###106231','qyer###112106','qyer###1145607','qyer###1161162','qyer###117171','qyer###117181','qyer###117185','qyer###117189','qyer###117195','qyer###119213','qyer###1201796','qyer###1201874','qyer###1209783','qyer###1211427','qyer###1211662','qyer###1212093','qyer###107358','qyer###1144549','qyer###1145794','qyer###116580','qyer###117173','qyer###117182','qyer###117186','qyer###117190','qyer###117207','qyer###1201604','qyer###1201852','qyer###1205949','qyer###1210091','qyer###1211473','qyer###1211803','qyer###1212095','qyer###1219810','qyer###123549','qyer###124298','qyer###1253756','qyer###1266027','qyer###1452553','qyer###1453565','qyer###14605','qyer###14625','qyer###14631','qyer###164303','qyer###164437','qyer###189225','qyer###201127','qyer###202276','qyer###202901','qyer###121896','qyer###1234685','qyer###1236200','qyer###1252732','qyer###1266011','qyer###1451214','qyer###1452742','qyer###14595','qyer###14615','qyer###14629','qyer###163988','qyer###164432','qyer###189196','qyer###201125','qyer###202272','qyer###202279','qyer###1234660','qyer###1236156','qyer###1251885','qyer###1265056','qyer###1450706','qyer###1452582','qyer###1454918','qyer###14614','qyer###14627','qyer###163986','qyer###164391','qyer###175388','qyer###201124','qyer###202260','qyer###202278','qyer###204859','qyer###122277','qyer###1235915','qyer###124654','qyer###126377','qyer###1446115','qyer###1452560','qyer###1454917','qyer###14608','qyer###14626','qyer###163984','qyer###164390','qyer###165080','qyer###200660','qyer###202254','qyer###202277','qyer###203338','qyer###204861','qyer###33734','qyer###34156','qyer###34535','qyer###36456','qyer###37785','qyer###39251','qyer###43218','qyer###43392','qyer###481770','qyer###481809','qyer###482061','qyer###482319','qyer###482340','qyer###482382','qyer###482846','qyer###204860','qyer###33732','qyer###33751','qyer###34454','qyer###36274','qyer###37349','qyer###39040','qyer###43041','qyer###43390','qyer###45622','qyer###481793','qyer###482040','qyer###482176','qyer###482336','qyer###482380','qyer###482563','qyer###205162','qyer###33750','qyer###34332','qyer###35432','qyer###37236','qyer###38229','qyer###39885','qyer###43257','qyer###45506','qyer###481779','qyer###482019','qyer###482104','qyer###482331','qyer###482376','qyer###482510','qyer###483289','qyer###205161','qyer###33744','qyer###34165','qyer###35231','qyer###36524','qyer###38200','qyer###39356','qyer###43253','qyer###44190','qyer###481776','qyer###481883','qyer###482074','qyer###482324','qyer###482366','qyer###482479','qyer###483256','qyer###483520','qyer###497111','qyer###497405','qyer###497466','qyer###497753','qyer###497855','qyer###498042','qyer###51761','qyer###52428','qyer###52995','qyer###53728','qyer###54096','qyer###54128','qyer###54161','qyer###54636','qyer###54961','qyer###496599','qyer###497117','qyer###497413','qyer###497557','qyer###497805','qyer###497870','qyer###498270','qyer###51805','qyer###52460','qyer###53009','qyer###54069','qyer###54101','qyer###54133','qyer###54169','qyer###54892','qyer###54976','qyer###55066','qyer###55731','qyer###55767','qyer###56545','qyer###56704','qyer###57136','qyer###580659','qyer###59148','qyer###61830','qyer###62225','qyer###73619','qyer###497107','qyer###497197','qyer###497425','qyer###497745','qyer###497837','qyer###497976','qyer###51744','qyer###52401','qyer###52767','qyer###53047','qyer###54093','qyer###54123','qyer###54153','qyer###54223','qyer###54956','qyer###55045','qyer###497101','qyer###497164','qyer###497415','qyer###497728','qyer###497810','qyer###497909','qyer###498273','qyer###52336','qyer###52646','qyer###53038','qyer###54086','qyer###54111','qyer###54138','qyer###54181','qyer###54941','qyer###54999','qyer###55388','qyer###55743','qyer###55797','qyer###56676','qyer###56720','qyer###57347','qyer###59038','qyer###61827','qyer###61831','qyer###62227','qyer###73620','qyer###55714','qyer###55755','qyer###56511','qyer###56688','qyer###57083','qyer###57594','qyer###59055','qyer###61829','qyer###61988','qyer###73458','qyer###99574','qyer###55531','qyer###55750','qyer###55826','qyer###56682','qyer###56725','qyer###57373','qyer###59044','qyer###61828','qyer###61832','qyer###73441','qyer###85387','qyer###115578','qyer###115662','qyer###50170');''' data = [] for line in MysqlSource(poi_ori_config, table_or_query=sql, size=10000, is_table=False, is_dict_cursor=True): data.append(line['id']) new_data = [] for line in data: new_data.append(line) if len(new_data) == 1000: update_sql(new_data) new_data = [] update_sql(new_data)
def _delete_already_scanned_file(): global offset _count = 0 sql = '''SELECT pic_name FROM PoiPictureInformation WHERE is_scaned = 1 LIMIT {}, 999999999999;'''.format(offset) for line in MysqlSource(devdb_config, table_or_query=sql, size=10000, is_table=False, is_dict_cursor=True): f_path = line['pic_name'] try: delete_file(f_path=f_path) except Exception as exc: logger.exception(msg="[delete file exception][f_path: {}]".format(f_path), exc_info=exc) if offset % 10000 == 0: logger.info("[delete file count][offset: {}]".format(offset)) _count += 1 offset += 1
def get_task(): g_dict = generate_qyer_url_id() sql = '''SELECT id, beentocount, plantocount, commentcount, qyer_url FROM test_result_2;''' data = [] _count = 0 for uid, b_c, p_c, c_c, q_url in MysqlSource(poi_ori_config, table_or_query=sql, size=10000, is_table=False, is_dict_cursor=False): _count += 1 if not str(q_url).endswith('/'): q_url += '/' q_url_id = re.findall('http://place.qyer.com/poi/(\S+?)/', q_url)[-1] bc_d = json.loads(b_c) del bc_d['qyer'] pc_d = json.loads(p_c) del pc_d['qyer'] cc_d = json.loads(c_c) del cc_d['qyer'] res = g_dict.get(q_url_id) if res: bc, cc, pc = res bc_d['qyer'] = int(bc) cc_d['qyer'] = int(cc) pc_d['qyer'] = int(pc) print(uid, json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d), q_url_id, q_url) else: print('##' * 10) print(uid, json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d), q_url_id, q_url) print('##' * 10) logger.info("[count: {}]".format(_count)) update_sql((json.dumps(bc_d), json.dumps(pc_d), json.dumps(cc_d), uid))
def hotel_unid_sid_set(source): query_sql = '''SELECT sid FROM hotel_unid WHERE source = '{}';'''.format(source) _set = set() count = 0 for line in MysqlSource(base_data_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=False): count += 1 if count % 10000 == 0: logger.info("[prepare unid sid][source: {}][count: {}]".format( source, count)) _set.add(line[0]) logger.info("[prepare unid sid][source: {}][count: {}]".format( source, count)) return _set
def hotel_detail_sid_set(source, tag): table_name = 'detail_hotel_{}_{}'.format(source, tag) query_sql = '''SELECT source_id FROM {};'''.format(table_name) _set = set() count = 0 for line in MysqlSource(service_platform_config, table_or_query=query_sql, size=10000, is_table=False, is_dict_cursor=False): count += 1 if count % 10000 == 0: logger.info( "[prepare detail sid][table_name: {}][count: {}]".format( table_name, count)) _set.add(line[0]) logger.info("[prepare detail sid][table_name: {}][count: {}]".format( table_name, count)) return _set