def run(): logging.info('start subscribe server.....') ##创建抓取网页的线程池 grab_pool = ThreadPool(GRAB_NUM) for i in range(GRAB_NUM): grab_pool.add_task(do_grab,None,id = i+1) ##创建解析网页的线程池 paser_pool = ThreadPool(PASER_NUM) for i in range(PASER_NUM): paser_pool.add_task(do_paser,None,id = i+1) ##创建发送邮件的线程池 send_pool = ThreadPool(MAIL_SENDER_NUM) for i in range(MAIL_SENDER_NUM): send_pool.add_task(do_send,None,id = i+1) # Join and destroy all threads grab_pool.destroy() paser_pool.destroy() send_pool.destroy()
# set/get print r.set("key", 'b' * 56000) print len(r.get("key")) # incr print r.set("incr_key", 0) print r.get("incr_key") print r.incr('incr_key') print r.get("incr_key") def press_test(): r = redis.StrictRedis(host='localhost', port=8323) for i in range(10000): key = 'foo_%d' % i r.set(key, 'b' * i) if i % 1000 == 0: print key, "->", len(r.get(key)) if __name__ == "__main__": #functional_test() # Create thread pool with nums threads pool = ThreadPool(32) # Add a task into pool for n in range(10): pool.add_task(functional_test) pool.add_task(press_test) # Join and destroy all threads pool.destroy()
for i in range(1, 10): try: imageUrls = get_image_url(restUrl) break except Exception, e: print 'get restUrl error times' + str(i) + ': %s' % (e,) logging.error('get restUrl error times' + str(i) + ': %s' % (e,)) time.sleep(10) if imageUrls is None or len(imageUrls) == 0: print 'get imageUrls error %s' % restUrl logging.error('get imageUrls error %s' % restUrl) continue # logging.debug("progress: %d of %d, %s , %d images", progress, total, reviewName, len(urls)) # 进度 count = 0 for imageUrl in imageUrls: # download_image(imageUrl, LOCAL_DIR + reviewName, # imageUrl[imageUrl.rfind("/") + 1:] + ".jpg") pool.add_task(download_image, imageUrl, LOCAL_DIR + reviewName, imageUrl[imageUrl.rfind("/") + 1:], SLEEP_SECONDS) # 多线程下载图片 count += 1 # logging.debug("task added: %d", count) # logging.debug("finished : %s", reviewName) # print "finished : %s" % ( reviewName) #logging.info("finished : %s" % ( reviewName)) pool.destroy()
def main_fresh(dbOrNot): """ Monitor URLs using fresh data. """ # set value for oldUrlObjDic dict. f = open("./urgentCriterion_new") while 1: string = f.readline().strip() if not string: break arr = string.split(",") #URL Object Format: URL(length, md5) oldUrlObjDic[arr[0]] = URL(int(arr[1]), arr[2]) f.close() f = open("./urgentAccessErrorURLs") while 1: string= f.readline().strip() if not string: break aeURLs.append(string) f.close() #lxw_tp #threadingNum = threading.Semaphore(THREADS_NUM) tp = ThreadPool(THREADS_NUM) threads = [] urlCount = 0 # monitor each url in .urls file f = open("./.urgentURLS") while 1: url = f.readline().strip() if not url: break #lxw_tp #Multiple Thread: Deal with "one url by one single thread". #mt = MyThread(monitor, (url,), threadingNum) tp.add_task(monitor, url) #mt.start() #threads.append(mt) urlCount += 1 f.close() #lxw_tp tp.destroy() #for thread in threads: # thread.start() """ while 1: over = True for thread in threads: if thread.isAlive(): if not thread.isTimedOut(): # not "Timed Out". over = False else: urgentMyUtils.writeLog("lxw_Timed Out", thread.getURL(), "") if over: break """ if aeCount > 0: allContent = "本次共监测网站{0}个, 其中有{1}个网站访问异常, 详细信息如下:\n\n{2}".format(urlCount, aeCount, aeContent) urgentMyUtils.sendEmail(aeSubject, allContent) if uwCount >0: allContent = "本次共监测网站{0}个, 其中有{1}个网站监测到有更新, 详细信息如下:\n\n{2}".format(urlCount, uwCount, uwContent) urgentMyUtils.sendEmail(uwSubject, allContent) #Update Criterion file. f = open("./urgentCriterion_new", "w") for url in newUrlObjDic.keys(): f.write("{0},{1},{2}\n".format(url, newUrlObjDic[url].length, newUrlObjDic[url].getMD5Str())) f.close() dbOrNot = False if dbOrNot: #update criterion in database urgentMyUtils.updateCriterion(newUrlObjDic) #Update accessErrorURLs file. f = open("./urgentAccessErrorURLs", "w") for url in aeURLs: f.write(url + "\n") f.close()
class Tasks(Codes): def __init__(self): self.operate = Operate() self._api = OpenApi() self._http = HttpClient.getInstance() self._pool = ThreadPool(5) # 初始化5个线程 print("Task Class 初始化完毕") def getAllAdmin(self): print("所有管理员: %s", variable.Admins) return variable.Admins def getAllGroup(self): print("所有关注群: %s", variable.Groups) return variable.Groups def addAdmin(self, qq): return self.operate.addAdmin(qq) def delAdmin(self, qq): return self.operate.delAdmin(qq) def isAdmin(self, qq): return self.operate.isAdmin(qq) def addGroup(self, qq): return self.operate.addGroup(qq) def delGroup(self, qq): return self.operate.delGroup(qq) def inGroup(self, qq): # print("inGroup: %s", qq) return self.operate.inGroup(qq) def addAsk(self, question, answer): return self.operate.addAsk(question, answer) def delAsk(self, Id): return self.operate.delAsk(Id) def getAsk(self, content): return self.operate.getAsk(content) def end(self): self._pool.destroy() def uin_to_qq(self, uin): if uin in variable.UsersQQ: return variable.UsersQQ.get(uin) print("获取qq %s %s %s", uin, variable.Vfwebqq, variable.Referer) html = self._http.get(variable.Get_friend_uin2.format(uin, self.bytesToStr(variable.Vfwebqq)), referer = variable.Referer) print("uin_to_qq: %s", html) try: result = json.loads(self.bytesToStr(html)) if result.get("retcode") != 0: return "" qq = result.get("result").get("account") if qq: variable.UsersQQ[uin] = str(qq) return str(qq) except Exception as e: print(e) return "" def sendMsg(self, *args, **kwargs): print("回复消息") url = kwargs.get("url") data = kwargs.get("data") # print(data) referer = kwargs.get("referer") result = self._http.post(url = url, data = data, referer = referer) print("回复结果: %s", result) def otherMsg(self, content, to, url, uin): if content: html = self._http.get(url = variable.RobotUrl.format(quote(content), uin)) html = html.replace("\\n", "").replace("\n", "") html = self._api.parse(html) html = self._api.getResult() if html: print("智能回复: ", html) data = {'r' : variable.Msg_Data.format(to, uin, html, variable.Clientid, variable.Msgid, variable.Psessionid)} print(data) self._pool.add_task(callback = self.sendMsg, url = url, data = data, referer = variable.Referer) def analyze(self, qq, uin, content, iseq = None): print("开始解析消息") if iseq: print("消息来自群") to = "group_uin" url = variable.Send_qun_msg2 else: print("消息来自好友") to = "to" url = variable.Send_buddy_msg2 # 是管理员 if self.isAdmin(qq) and content in ("开启机器人", "关闭机器人", "退出"): # 解析管理员命令 _msg = "" print("是管理员消息") if content == "开启机器人": variable.State = True print("机器人已开启") _msg = "机器人已开启" elif content == "关闭机器人": variable.State = False print("机器人已关闭") _msg = "机器人已关闭" elif content == "退出": variable.State = False variable.Exit = True print("机器人已退出") _msg = "机器人已退出" if _msg: data = {'r' : variable.Msg_Data.format(to, uin, _msg, variable.Clientid, variable.Msgid, variable.Psessionid)} self._pool.add_task(callback = self.sendMsg, url = url, data = data, referer = variable.Referer) return # 给我发送的私人消息(是否是命令) result = variable.Command.findall(content) if result and to == "to": ver, msg = result[0] _msg = "" if ver == variable.AddAdmin: # 添加管理员 print("添加管理员") if self.addAdmin(msg): _msg = "添加管理员: " + msg + " 成功" else: _msg = "添加管理员: " + msg + " 失败" elif ver == variable.DelAdmin: # 删除管理员 print("删除管理员") if self.delAdmin(msg): _msg = "删除管理员: " + msg + " 成功" else: _msg = "删除管理员: " + msg + " 失败" elif ver == variable.AddAttention: # 添加关注群号 print("添加关注群") if self.addGroup(msg): _msg = "添加关注群: " + msg + " 成功" else: _msg = "添加关注群: " + msg + " 失败" elif ver == variable.DelAttention: # 删除关注群号 print("删除关注群号") if self.delGroup(msg): _msg = "删除关注群: " + msg + " 成功" else: _msg = "删除关注群: " + msg + " 失败" if _msg: data = {'r' : variable.Msg_Data.format(to, uin, _msg, variable.Clientid, variable.Msgid, variable.Psessionid)} self._pool.add_task(callback = self.sendMsg, url = url, data = data, referer = variable.Referer) return if content.startswith("#") and len(content) > 2 and self.inGroup(iseq): # 开头以#开头并且长度大于2并且是关注群发送的消息 i = 0 content = content[1:].strip() for w in content: if w in variable.Filter: print("发现过滤词: ", w) i += 1 if i == 0: self._pool.add_task(callback = self.otherMsg, content = content, to = to, url = url, uin = uin) return else: data = {'r' : variable.Msg_Data.format(to, uin, "你想干什么!f**k", variable.Clientid, variable.Msgid, variable.Psessionid)} self._pool.add_task(callback = self.sendMsg, url = url, data = data, referer = variable.Referer) return if to == "to" and len(content) > 2: # 私人消息 self._pool.add_task(callback = self.otherMsg, content = content.strip(), to = to, url = url, uin = uin) return def delwith(self, fuin, suin, iseq, content): ''' #fuin 消息发送者 #suin 群消息发送人qq #iseq 群号码 #content 消息内容 ''' print("%s %s %s %s", fuin, suin, iseq, content) # 如果是群文件消息 if content.startswith("<?xml"): print("发现共享文件") # 发送者qq if iseq and suin: qq = self.uin_to_qq(suin) else: qq = self.uin_to_qq(fuin) print("qq: %s", qq) self.analyze(qq, fuin, content, iseq)
def lagouScrapy(self): tp = ThreadPool(18) for i in range(200): tp.add_task(self.lagou, i) i += 1 tp.destroy()
def start(baseUrl,seedUrl): # clean reffer in reffer.txt f = open("reffer.txt","w") f.close() #seed = Request(base='http://192.168.42.131/dvwa/index.php',url='http://192.168.42.131/dvwa/index.php',method='get') seed = request.Request(base=baseUrl,url=seedUrl,timeout=config.conf['connTimeout'],query={},method='get') #seed = request.Request(base='http://192.168.42.132/dvwa/',url='http://192.168.42.132/dvwa/',query={},method='get') colors.blue( '种子URL: %s\n'%seed._url) logfileName = create_logfile(seed._url) cookie = getCookie(seed._url) # begin crawler tup = urlparse.urlparse(seed._url) netloc = tup.netloc # seed url count = 0 q = Queue.Queue() bf = bloomFilter.BloomFilter(0.001,100000) # readreffer from reffer.txt ''' reffer = readReffer() reqSet = [] reqSet.append(seed) reqSet.extend(reffer) for i in reqSet: q.put(i) bf.insert(i._url) ''' q.put(seed) bf.insert(seed._url) nums = config.conf['MaxThread'] pool = ThreadPool(nums) begin = time.time() while(not q.empty()): req = q.get() req._cookies = cookie reqs = crawler.crawl(req,tree) if req._query != {} and is_tree_full(req._url,tree): #if req._query != {}: count += 1 print 'URL: ',req._BFUrl,' ', req._source pool.add_task(startCheck,req,logfileName) for x in reqs: if not bf.exist(x._BFUrl): bf.insert(x._BFUrl) q.put(x) pool.destroy() end = time.time() f = open(logfileName,'r') colors.blue('\n扫描结果:\n\n') x = f.read() colors.green(x) colors.blue('\n扫描结果已保存在 "%s"\n\n'%(os.getcwd()+'/'+logfileName)+' 中') cost = end - begin print "耗时:%f秒"%cost print "进行测试的URL数量:",count f.close() f = open(logfileName,'a') f.write(advice()) f.close() os.system('ps -ef | grep -v grep | grep proxy.py | awk \'{print $2}\'|xargs kill -9') '''