def start_thread(kw): """执行任务 """ #注册数据库 mysqlwrap.setup_db('default',config.CONFIG['mysqld']) #启动数据连接池守护进程 mysqlwrap.pool_monitor() thread_count = kw.pop('thread_count',THREAD_COUNT) biz_flag = kw.pop('biz_flag') exit_flag = kw.get('exit_flag') urls = kw.get('urls','') if biz_flag not in datamodel.SF_DIST.keys(): raise Exception("illegal biz_flag!") mod = kw.pop('mod') #设置进程名 title = "%s %s %s" %(datamodel.proc_title,mod,biz_flag) setproctitle.setproctitle(title) logger.info("%s start..." % title) prox_queue = Queue() try: md = __import__(mod) md.biz_flag = biz_flag if urls: for k,url in urls.items(): exec("md.%s='%s'" % (k,url)) print(urls) thrd = [ExecThread(md,prox_queue) for i in range(0,thread_count)] #thrd.append(Thread(target=exec_main,args=(md,))) for i in range(0,len(thrd)): thrd[i].setDaemon(True) thrd[i].start() except Exception as e: traceback.print_exc() s_time = time.time() while 1: #填充代理ip队列 push_proxy_queue(prox_queue,md.ille_proxy_ip) if exit_flag.is_set(): logger.info("%s exit..." % title) datamodel.g_exit = True for i in range(0,len(thrd)): #print(thrd[i].name + "exit....") thrd[i].stop() thrd[i].join() break for i in range(0,len(thrd)): if not thrd[i].isAlive(): #print("%s is death try reload..." % thrd[i].name) thrd[i] = ExecThread(md,prox_queue) thrd[i].setDaemon(True) thrd[i].start() if int(time.time())-s_time > REPORT_TIME: logger.info("%s %s S:%s N:%s F:%s" % (mod,biz_flag,md.Ok_num,md.Null_num,md.False_num)) s_time = time.time() time.sleep(3)
def init(self): global KWORDS if self._input is None: raise Exception("not set input config!") if self._kwsource is None and self._keywords is None: raise Exception("not set keywords!") if self._dbsource is None: raise Exception("not set dbsource!") #注册数据库连接 conn_rd = get_conn_hash() info = conn_rd.get(self._input['dbserver'])['info'] info['dbname']=self._input['data']['dbname'] mysqlwrap.setup_db(self._input['dbserver'],info) info = conn_rd.get(self._kwsource['dbserver'])['info'] info['dbname']=self._kwsource['data']['dbname'] mysqlwrap.setup_db(self._kwsource['dbserver'],info) info = conn_rd.get(self._output['dbserver'])['info'] info['dbname']=self._output['dbname'] mysqlwrap.setup_db(self._output['dbserver'],info) mysqlwrap.pool_monitor() #生成keywords if self._keywords: self.keywords = self._keywords KWORDS = self.keywords else: db = mysqlwrap.get_db(self._kwsource['dbserver']) res,desc = db.query(self._kwsource['data']) if res == -1 or not desc: print(res,desc) raise Exception("no keywords!") for row in desc: if row['k_word']: self.keywords.append(row['k_word']) self.keyweight[row['k_word']]=row['k_weight']
i+=1 if res and 'name' in res: print("total_num:%s" % i,id) print(updata(res,'ah')) else: print(res) if __name__=="__main__": #dbinfo ={'host':'192.168.10.126','port':3306,'dbname':'gov_corp', # 'user':'******','passwd':'wbsp','charset':'utf8'} #check_exist_corp('js') config.read('./conf/worker.conf') mysqlwrap.setup_db('default',config.CONFIG['mysqld']) THREAD_COUNT = 10 import sys sys.path.append(sys.path[0]+'/modules') import anhui #rediswrap.setup_redis('default','192.168.10.126',6380) wk = Thread(target=doworke) wk.start() while True: time.sleep(3)
'url_check':'http://gxqyxygs.gov.cn/checkCheckNo.jspx', 'url_list':'http://gxqyxygs.gov.cn/searchList.jspx'} }, 'ha':{'mod':'anhui','thread_count':50, 'urls':{'host':'http://222.143.24.157', 'url_home':'http://222.143.24.157/search.jspx', 'url_code':'http://222.143.24.157/validateCode.jspx?type=1&id=%s', 'url_check':'http://222.143.24.157/checkCheckNo.jspx', 'url_list':'http://222.143.24.157/searchList.jspx'} }, 'hl':{'mod':'anhui','thread_count':50, 'urls':{'host':'http://gsxt.hljaic.gov.cn', 'url_home':'http://gsxt.hljaic.gov.cn/search.jspx', 'url_code':'http://gsxt.hljaic.gov.cn/validateCode.jspx?type=1&id=%s', 'url_check':'http://gsxt.hljaic.gov.cn/checkCheckNo.jspx', 'url_list':'http://gsxt.hljaic.gov.cn/searchList.jspx'} }, } if __name__=="__main__": dbinfo ={'host':'192.168.10.126','port':3306,'dbname':'gov_corp', 'user':'******','passwd':'wbsp','charset':'utf8'} mysqlwrap.setup_db('default',dbinfo) mysqlwrap.get_db().connect() rediswrap.setup_redis('default','192.168.10.126',6380) for p in SF_DIST.keys(): sql = Data_tb % p mysqlwrap.get_db().query(sql,1) #print(get_proxy())
if f[:4] == "biz_" and f[-5:] == ".json": sconf.BIZ[f[4:-5]] = json.loads(open("./conf/%s" % f).read().replace("\n", "").replace("\t", "")) except: logger.error("config file %s not load.please check." % f) # 加载zk zkwrap.setup("default", sconf.SYS["zk"]) # 加载数据库 mysqlwrap.setup_db("default", sconf.SYS["mysql"]) mysqlwrap.pool_monitor() # rediswrap.setup_redis('default',sconf.SYS['redis']['host'],sconf.SYS['redis']['port']) rediswrap.setup_redis("cache", sconf.SYS["redis"]["host"], sconf.SYS["redis"]["port"], decode_flag=False) sconf.SYS["root_dir"] = sys.path[0] G_exit = False mpexit = mp.Event() # 启动定时任务
#print(sconf.HOST) #print(sconf.DATA_SOURC) except: pass #加载业务配置文件 files = os.listdir("./conf") for f in files: try: if f[:4]=='biz_' and f[-5:]=='.json': sconf.BIZ[f[4:-5]] = json.loads(open("./conf/%s"%f).read().replace('\n','').replace('\t','')) except: logger.error("config file %s not load.please check."%f) #加载zk zkwrap.setup('default',sconf.SYS['zk']) #加载数据库 mysqlwrap.setup_db('default',sconf.SYS['mysql']) mysqlwrap.pool_monitor() rediswrap.setup_redis('default',sconf.SYS['redis']['host'],sconf.SYS['redis']['port']) rediswrap.setup_redis('cache',sconf.SYS['redis']['host'],sconf.SYS['redis']['port']) sconf.SYS['root_dir'] = sys.path[0] G_exit = False mpexit = mp.Event() #启动定时任务 #from stat_daemon import cron #p = mp.Process(target=cron, args=(mpexit,)) #p.daemon = True #p.start()