def load_all_lsmc(): sql = "select mc from law.dtjk_company_src" res = MySQL.execute_query(sql) mc_set = set([]) for row in res: mc = row[0] mc_set.add(mc) sql2 = "select mc from enterprise_credit_info.dtjk_company_src_history" res2 = MySQL.execute_query(sql2) insert_params = [] update_params = [] replace_params = [] for row in res2: mc = row[0].decode('utf-8') if mc in mc_set: update_params.append((mc,)) else: insert_params.append((mc,)) replace_params.append((mc,)) # insert_params MySQL.execute_many_update(u"insert into law.dtjk_company_src values(%s,-1,null,null,'曾用名')", insert_params) MySQL.execute_many_update(u"update law.dtjk_company_src set update_status=-1 where mc=%s", update_params) MySQL.execute_many_update(u"replace into law.dtjk_company_shixin values(%s,-1,null,'曾用名')", replace_params) MySQL.execute_many_update(u"replace into law.dtjk_company_beizhixing values(%s,-1,null,'曾用名')", replace_params) MySQL.execute_many_update(u"replace into news.news_lsmc values(%s,-1,null,date(now()))", replace_params)
def load_company_test(): config = {'host': '172.16.0.102', 'user': '******', 'password': '******', 'port': 3306, 'database': 'ljzxdb', 'charset': 'utf8mb4' } connection = mysql.connector.connect(**config) cursor = connection.cursor() sql = "select CompanyName,AreaName from v_monitorcompany" cursor.execute(sql) last_update_date_dict = {} update_status_dict = {} sql_1 = "select mc,date(last_update_time),update_status from enterprise_credit_info.dtjk_company_src_test" res_1 = MySQL.execute_query(sql_1) for row in res_1: last_update_date_dict[row[0]] = row[1] update_status_dict[row[0]] = row[2] while True: res = cursor.fetchmany(1000) insert_args = [] update_args = [] for row in res: mc = row[0] province = row[1] # city = row[4] # print type(name), type(province), type(city) info(mc) if not mc.isdigit() and len(mc) > 1: # sql_2 = u"select date(last_update_time),update_status from enterprise_credit_info.dtjk_company_src_test where mc='%s'" % mc # res_2 = MySQL.execute_query(sql_2) if mc not in update_status_dict: insert_args.append((mc, province)) else: if not(last_update_date_dict[mc] == datetime.date.today() and update_status_dict[mc] in (0, 1) or update_status_dict[mc] == -1): update_args.append((mc,)) if insert_args: insert_sql = u"insert into enterprise_credit_info.dtjk_company_src_test " \ u"values(%s,-1,null,'动态监控测试',%s,null)" MySQL.execute_many_update(insert_sql, insert_args) if update_args: update_sql = u"update enterprise_credit_info.dtjk_company_src_test set update_status=-1 where mc=%s" MySQL.execute_many_update(update_sql, update_args) if len(res) < 1000: break sql_3 = ''' UPDATE dtjk_company_src_test d, yyzz y SET d.xydm = y.xydm WHERE d.xydm is null and d.mc = y.mc; ''' MySQL.execute_update(sql_3)
def run(self): cnt_0 = 0 cnt_1 = 0 cnt_2 = 0 while True: sql_1 = "select mc,province from " \ "(" \ "select * from %s where update_status=-1 " \ "and province='%s'" \ "limit 30 " \ ") t " \ "order by RAND() limit 1 " % (self.src_table, self.province) # print sql_1 res_1 = MySQL.execute_query(sql_1) if len(res_1) > 0: mc = res_1[0][0] province = res_1[0][1] self.info(mc + '|' + province) sql_2 = "update %s set update_status=-2 " \ "where mc='%s'" \ % (self.src_table, mc) MySQL.execute_update(sql_2) try: update_status = self.searcher.crawl(keyword=mc, province=province) sql_3 = "update %s set update_status=%d, last_update_time=now() " \ "where mc='%s'" % \ (self.src_table, update_status, mc) except Exception, e: traceback.print_exc(e) update_status = -1 self.info(str(e)) sql_3 = "update %s set update_status=%d,last_update_time=now() " \ "where mc='%s'" % \ (self.src_table, update_status, mc) self.searcher.delete_tag_a_from_db(mc, province) MySQL.execute_update(sql_3) if update_status == 0: cnt_0 += 1 elif update_status == 1: cnt_1 += 1 else: cnt_2 += 1 self.info(u'查询有结果: %d, 查询无结果: %d, 查询失败:%d' % (cnt_1, cnt_0, cnt_2)) else: self.info(u'更新完毕') break
def run(self): cnt_0 = 0 cnt_1 = 0 cnt_2 = 0 while True: sql_1 = "select mc,province from " \ "(" \ "select * from %s where update_status=-1 limit 30 " \ ") t " \ "order by RAND() limit 1" % self.src_table # print sql_1 res_1 = MySQL.execute_query(sql_1) if len(res_1) > 0: mc = res_1[0][0] last_update_date = TimeUtils.get_today() sql_2 = "update %s set update_status=-2 " \ "where mc='%s'" \ % (self.src_table, mc) MySQL.execute_update(sql_2) try: update_status = self.searcher.submit_search_request( keyword=mc) sql_3 = "update %s set update_status=%d, last_update_time=now() " \ "where mc='%s'" % \ (self.src_table, update_status, mc) except Exception, e: self.info(traceback.format_exc(e)) update_status = -1 self.info(str(e)) sql_3 = "update %s set update_status=%d,last_update_time=now() " \ "where mc='%s'" % \ (self.src_table, update_status, mc) MySQL.execute_update(sql_3) if update_status == 0: cnt_0 += 1 elif update_status == 1: cnt_1 += 1 else: cnt_2 += 1 self.info(u'查询有结果: %d, 查询无结果: %d, 查询失败:%d' % (cnt_1, cnt_0, cnt_2)) else: self.info(u'更新完毕') break
def load_dtjk_nb_test(): config = {'host': '172.16.0.102', 'user': '******', 'password': '******', 'port': 3306, 'database': 'ljzxdb', 'charset': 'utf8mb4' } connection = mysql.connector.connect(**config) cursor = connection.cursor() sql = "select CompanyName,MonitorDate from v_monitorcompany where MonitorDate in ('%s','%s','%s')" \ % ( str(int(TimeUtils.get_today()[:4]) - 1) + TimeUtils.get_today()[4:], str(int(TimeUtils.get_yesterday()[:4]) - 1) + TimeUtils.get_yesterday()[4:], str(int(TimeUtils.get_the_day_before_yesterday()[:4]) - 1) + TimeUtils.get_the_day_before_yesterday()[4:] ) # print sql cursor.execute(sql) res = cursor.fetchall() for row in res: mc = row[0] monitor_date = str(row[1]) monitor_date = str(int(monitor_date[:4]) + 1) + monitor_date[4:] + ' 00:00:00' # info(mc) if not mc.isdigit() and len(mc) > 1: sql_2 = u"""select last_update_time from enterprise_credit_info.nb where entname='%s' and ancheyear='2016' union all select last_update_time from enterprise_credit_info.gtnb where traName='%s' and ancheyear='2016' union all select last_update_time from enterprise_credit_info.sfcnb where farSpeArtName='%s' and ancheyear='2016' """ % (mc, mc, mc) res_2 = MySQL.execute_query(sql_2) if len(res_2) == 0: sql_3 = u"replace into enterprise_credit_info.dtjk_company_src_nb(mc,update_status,last_update_time) " \ u"values('%s',-1,'%s')" % (mc, monitor_date) MySQL.execute_update(sql_3)
def run(self): # cnt_0 = 0 # cnt_1 = 0 # cnt_2 = 0 # cnt_999 = 0 fail_dict = dict() update_result = {u'更新成功': 0, u'查无结果': 0, u'更新失败': 0, u'未上线': 0} while True: # print json.dumps(fail_dict, ensure_ascii=False) sql_1 = "select mc,province,xydm from " \ "(" \ "select * from %s where update_status=-1 order by last_update_time limit 30 " \ ") t " \ "order by RAND() limit 1" % self.src_table # print sql_1 res_1 = MySQL.execute_query(sql_1) if len(res_1) > 0: mc = res_1[0][0] province = res_1[0][1] xydm = res_1[0][2] print mc, province self.info(mc + '|' + province) sql_2 = "update %s set update_status=-2,last_update_time=now() " \ "where mc='%s'" \ % (self.src_table, mc) MySQL.execute_update(sql_2) try: if province in self.online_province: if province in ( u'河北省', u'宁夏回族自治区', u'河南省', u'海南省', u'重庆市', u'江西省', u'贵州省', u'湖南省', u'陕西省', u'山西省', u'黑龙江省', u'吉林省', u'内蒙古自治区', u'广西壮族自治区', u'云南省', u'西藏自治区', u'青海省', u'新疆维吾尔自治区', u'甘肃省', u'工商总局', u'浙江省', u'江苏省', u'广东省', u'上海市', # u'' ) and check(xydm): keyword = xydm else: keyword = mc update_status = self.searcher.crawl(keyword=keyword, province=province) else: update_status = 999 sql_3 = "update %s set update_status=%d, last_update_time=now() " \ "where mc='%s'" % \ (self.src_table, update_status, mc) if mc in fail_dict: fail_dict.pop(mc) except Exception, e: # traceback.print_exc(e) self.info(traceback.format_exc(e)) if fail_dict.get(mc, 0) > 10: update_status = 3 if mc in fail_dict: fail_dict.pop(mc) else: update_status = -1 fail_dict[mc] = fail_dict.get(mc, 0) + 1 # self.info(str(e)) sql_3 = "update %s set update_status=%d " \ "where mc='%s'" % \ (self.src_table, update_status, mc) self.searcher.delete_tag_a_from_db(mc, province) MySQL.execute_update(sql_3) # print 'update_status', update_status if update_status == 0: update_result[u'查无结果'] += 1 elif update_status == 1: update_result[u'更新成功'] += 1 elif update_status == 999: update_result[u'未上线'] += 1 else: update_result[u'更新失败'] += 1 self.info(json.dumps(update_result, ensure_ascii=False)) else: self.info(u'更新完毕') break
def run(self): # cnt_0 = 0 # cnt_1 = 0 # cnt_2 = 0 # cnt_999 = 0 fail_dict = dict() update_result = {u'更新成功': 0, u'查无结果': 0, u'更新失败': 0, u'未上线': 0} while True: # print json.dumps(fail_dict, ensure_ascii=False) sql_1 = "select mc,province from " \ "(" \ "select * from %s where update_status=-1 order by last_update_time limit 30 " \ ") t " \ "order by RAND() limit 1" % self.src_table # print sql_1 res_1 = MySQL.execute_query(sql_1) if len(res_1) > 0: mc = res_1[0][0].decode('utf-8') province = res_1[0][1].decode('utf-8') # print mc, province self.info(mc + '|' + province) sql_2 = "update %s set update_status=-2,last_update_time=now() " \ "where mc='%s'" \ % (self.src_table, mc) MySQL.execute_update(sql_2) try: if province in self.online_province: update_status = self.searcher.crawl(keyword=mc, province=province) else: update_status = 999 sql_3 = "update %s set update_status=%d, last_update_time=now() " \ "where mc='%s'" % \ (self.src_table, update_status, mc) if mc in fail_dict: fail_dict.pop(mc) except Exception, e: # traceback.print_exc(e) self.info(traceback.format_exc(e)) if fail_dict.get(mc, 0) > 10: update_status = 3 if mc in fail_dict: fail_dict.pop(mc) else: update_status = -1 fail_dict[mc] = fail_dict.get(mc, 0) + 1 # self.info(str(e)) sql_3 = "update %s set update_status=%d " \ "where mc='%s'" % \ (self.src_table, update_status, mc) self.searcher.delete_tag_a_from_db(mc, province) MySQL.execute_update(sql_3) # print 'update_status', update_status if update_status == 0: update_result[u'查无结果'] += 1 elif update_status == 1: update_result[u'更新成功'] += 1 elif update_status == 999: update_result[u'未上线'] += 1 else: update_result[u'更新失败'] += 1 self.info(json.dumps(update_result, ensure_ascii=False)) else: self.info(u'更新完毕') time.sleep(5 * 60)