def __init__(self, pool, maxHostID, monitorInterval=2): self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = str(pool.spUUID) self._spmStorageDir = pool.storage_repository tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = 3 maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool(tpSize, waitTimeout, maxTasks) # *** IMPORTANT NOTE: The SPM's inbox is the HSMs' outbox and vice # versa *** # self._inbox = os.path.join(self._spmStorageDir, self._poolID, "mastersd", sd.DOMAIN_META_DATA, "inbox") if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = os.path.join(self._spmStorageDir, self._poolID, "mastersd", sd.DOMAIN_META_DATA, "outbox") if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * "\0" self._incomingMail = self._outgoingMail self._inCmd = ['dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = ['dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # Clear outgoing mail self.log.debug("SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") t = concurrent.thread(self.run, name="mailbox.SPMMonitor", logger=self.log.name) t.start() self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID)
def __init__(self, args=Strategy()): self.url = args.url self.max_depth = args.max_depth #指定网页深度 self.max_count = args.max_count #爬行最大数量 self.concurrency = args.concurrency #线程数 self.timeout = args.timeout #超时 self.cookies = args.cookies #cookies self.ssl_verify = args.ssl_verify #ssl self.same_host = args.same_host #是否只抓取相同host的链接 self.same_domain = args.same_domain #是否只抓取相同domain的链接 self.currentDepth = 1 #标注初始爬虫深度,从1开始 self.keyword = args.keyword #指定关键词,使用console的默认编码来解码 self.threadPool = ThreadPool(args.concurrency) #线程池,指定线程数 self.visitedHrefs = set() #已访问的链接 self.unvisitedHrefs = deque() #待访问的链接 self.unvisitedHrefs.append(args.url) #添加首个待访问的链接 self.isCrawling = False #标记爬虫是否开始执行任务 self.file = BASEDIR + '/cache/crawler/' + genFilename( self.url) + '.txt' # print self.file # print 'args.url=\t',args.url ################# #此句有问题 self.database = Database(args.dbFile) #数据库 # print 'hehe' self.lock = Lock()
def saveProxies(self): #创建线程30个,并开启线程 threadPool = ThreadPool(30) threadPool.startThreads() #调用类 读取数据 #databases = database.DatabaseProxyIp() proxyip = self.proxyip_db.readData() #x循环读取数据进行匹配 for proxy in proxyip: #把测试函数放入线程中 threadPool.putTask(self.checkclientUrl, proxy[0]) #threadPool.putTask(self.checkProxy, proxy[0]) #flag,proxy = checkProxy(proxy[0]) #循环获取测试结果,成功写入数据库,失败修改available为0 ip_fail = 0 ip_ok = 0 ip_lock = 0 while threadPool.getTaskLeft(): flag, proxy = threadPool.getTaskResult() print flag, proxy if flag == 'ok': #print 'ok ', proxy self.proxyip_db.updateData(1, proxy) ip_ok = ip_ok + 1 elif flag == 'lock': self.proxyip_db.updateData(0, proxy) ip_lock = ip_lock + 1 else: self.proxyip_db.delData(proxy) ip_fail = ip_fail + 1 print '====> available ip: ', ip_ok, ' , lock ip: ', ip_lock, ' , fail ip: ', ip_fail, ' <====' threadPool.stopThreads()
def __init__(self, inbox, outbox, hostID, queue, monitorInterval): # Save arguments tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = 3 maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool(tpSize, waitTimeout, maxTasks) self._stop = False self._flush = False self._queue = queue self._activeMessages = {} self._monitorInterval = monitorInterval self._hostID = int(hostID) self._used_slots_array = [0] * MESSAGES_PER_MAILBOX self._outgoingMail = EMPTYMAILBOX self._incomingMail = EMPTYMAILBOX # TODO: add support for multiple paths (multiple mailboxes) self._spmStorageDir = config.get('irs', 'repository') self._inCmd = [ constants.EXT_DD, 'if=' + str(inbox), 'iflag=direct,fullblock', 'bs=' + str(BLOCK_SIZE), 'count=' + str(BLOCKS_PER_MAILBOX), 'skip=' + str(self._hostID * BLOCKS_PER_MAILBOX) ] self._outCmd = [ constants.EXT_DD, 'of=' + str(outbox), 'iflag=fullblock', 'oflag=direct', 'conv=notrunc', 'bs=' + str(BLOCK_SIZE), 'seek=' + str(self._hostID * BLOCKS_PER_MAILBOX) ] self._init = False self._initMailbox() # Read initial mailbox state self._msgCounter = 0 self._sendMail() # Clear outgoing mailbox self._thread = concurrent.thread(self.run, name="mailbox/hsm", logger=self.log.name) self._thread.start()
def __init__(self, ip, port, initial_nodes, period): super(DiscoveryService, self).__init__(name='Discovery', ip=ip, port=port) self.period = period self.nodes = initial_nodes self.threadpool = ThreadPool(2)
def __init__(self, url, depth, threadNum, dbfile, key): #要获取url的队列 self.urlQueue = Queue() #读取的html队列 self.htmlQueue = Queue() #已经访问的url self.readUrls = [] #未访问的链接 self.links = [] #线程数 self.threadNum = threadNum #数据库文件名 self.dbfile = dbfile #创建存储数据库对象 self.dataBase = SaveDataBase(self.dbfile) #指点线程数目的线程池 self.threadPool = ThreadPool(self.threadNum) #初始化url队列 self.urlQueue.put(url) #关键字,使用console的默认编码来解码 self.key = key.decode(getdefaultlocale()[1]) #爬行深度 self.depth = depth #当前爬行深度 self.currentDepth = 1 #当前程序运行状态 self.state = False
def __init__(self, url, depth, threadNum, dbfile, key): #瑕佽幏鍙杣rl鐨勯槦鍒� self.urlQueue = Queue() #璇诲彇鐨刪tml闃熷垪 self.htmlQueue = Queue() #宸茬粡璁块棶鐨剈rl self.readUrls = [] #鏈闂殑閾炬帴 self.links = [] #绾跨▼鏁� self.threadNum = threadNum #鏁版嵁搴撴枃浠跺悕 self.dbfile = dbfile #鍒涘缓瀛樺偍鏁版嵁搴撳璞� self.dataBase = SaveDataBase(self.dbfile) #鎸囩偣绾跨▼鏁扮洰鐨勭嚎绋嬫睜 self.threadPool = ThreadPool(self.threadNum) #鍒濆鍖杣rl闃熷垪 self.urlQueue.put(url) #鍏抽敭瀛�浣跨敤console鐨勯粯璁ょ紪鐮佹潵瑙g爜 self.key = key.decode(getdefaultlocale()[1]) #鐖娣卞害 self.depth = depth #褰撳墠鐖娣卞害 self.currentDepth = 1 #褰撳墠绋嬪簭杩愯鐘舵� self.state = False
def main(): try: f = open(r'ip.txt', 'rb') ip = '' for line in f.readlines(): final_ip = line.strip('\n') for i in get_ip_list(final_ip): print i ip += str(i).strip() + '\n' with open(r'scan_ip.txt', 'w') as ff: ff.write(ip) data = [] items = portscan() # 进行masscan跑端口 dataList = {} for i in items: i = i.split('|') if i[1] not in dataList: dataList[str(i[1])] = [] dataList[str(i[1])].append(i[0]) for i in dataList: if len(dataList[i]) >= 50: for port in dataList[i]: items.remove(str(port) + '|' + str(i)) # 删除超过50个端口的 pool = ThreadPool(20, 1000) pool.start( NmapScan, items, data, ) except Exception as e: print e pass
def Principal(): thread = ThreadPool(10) while True: cnx, end = s.accept() print "o seguinte endereço se conectou: " + end[0] clientes.append(cnx) thread.insert_job(novo_cliente, cnx, end)
def __init__(self, tpSize=config.getfloat('irs', 'thread_pool_size'), waitTimeout=3, maxTasks=config.getfloat('irs', 'max_tasks')): self.storage_repository = config.get('irs', 'repository') self.tp = ThreadPool(tpSize, waitTimeout, maxTasks) self._tasks = {} self._unqueuedTasks = []
def testThreadPool(self): allTheThreads = [] with ThreadPool( 10 ) as tp: for i in range(200): w = MockWorker( None, None, None, None, f"Thread {i}" ) allTheThreads.append( w ) tp.addWorker( w ) for thread in allTheThreads: self.assertFalse( thread.is_alive() )
def __init__(self, args): self.depth = args.depth self.currentDepth = 1 self.database = database(args.dbFile) self.threadPool = ThreadPool(args.threadNum) self.visitUrls = set() self.unvisitedUrls = deque() self.unvisitedUrls.append(args.url) self.isCrawling = False self.maxWebPages = args.maxWebPages
def __init__(self, ip, port, nodes, timeout, directory, file_transfer_service): super(CheckFileService, self).__init__(name='CheckFile', ip=ip, port=port) self.nodes = nodes self.directory = directory self.client_socket.settimeout(timeout) self.threadpool = ThreadPool(2) self.file_transfer_service = file_transfer_service
def __init__(self, args): self.depth = args.depth self.currentDepth = 1 self.keyword = args.keyword.decode(getdefaultlocale()[1]) self.database = Database(args.dbFile) self.threadPool = ThreadPool(args.threadNum) self.visitedHrefs = set() self.unvisitedHrefs = deque() self.unvisitedHrefs.append(args.url) self.isCrawling = False
def clientThreadMain(): ''' Cria-se 20 threads pre-alocadas''' thread = ThreadPool(20) ''' Laco principal do servidor ''' while True: conexao, endereco = server.accept() print endereco[0] + " conectou!" ''' Quando um cliente se conecta, eh adicionado a uma lista de clientes (usado para o broadcast) ''' clientes.append(conexao) thread.insert_job(newClient, conexao, endereco)
def __init__(self, ip, port, node_list, directory, timeout): self.check_file_service = CheckFileService( ip=ip, port=3001, nodes=node_list, timeout=timeout, directory=directory, ) self.file_transfer_service = FileTransferService(ip=ip, port=3002, directory=directory) self.directory = directory self.threadpool = ThreadPool(2)
def main(): node_list = ["0.0.0.0", "localhost"] main_pool = ThreadPool(3) discovery_service = DiscoveryService(ip="127.0.0.1", port=3000, initial_nodes=node_list, period=5) file_service = FileService(ip="127.0.0.1", port=3001, node_list=node_list, directory='files/', timeout=5) main_pool.add_task(discovery_service.start_service) main_pool.add_task(file_service.start_service) main_pool.wait_completion()
def start(self): with ThreadPool( self.max_jobs ) as tp : for url_to_visit in self.urls_provider : if not self.exclusions.isExcluded( url_to_visit ) : logging.info( f"visiting url {url_to_visit.value}..." ) try: self._waitUntilWorkingHour() w = Worker( self.user_agent, self.sentenceProcessor, self.urlProcessor, self.webSiteInfoProvider, self.MINIMUM_WORDS_PER_SENTENCE, url_to_visit.value ) tp.addWorker( w ) except Exception as ex: logging.error( f"Error fetching url {url_to_visit.value}") logging.error( ex )
def __init__(self, args): #指定网页深度 self.depth = args.depth #表示爬虫深度,从1开始 self.currentDepth = 1 #数据库 self.database = Database(args.dbFile) #线程池,指定线程数 self.threadPool = ThreadPool(args.threadNum) #已经访问的链接 self.visitedHrefs = set() #待访问的页面 self.unvisitedHrefs = deque() #首个待访问的页面 self.url = args.url self.unvisitedHrefs.append(args.url) #标记爬虫是否开始执行 self.isCrawling = False
def __init__(self, args): #指定网页深度 self.depth = args.depth #标注初始爬虫深度,从1开始 self.currentDepth = 1 #指定关键词,使用console的默认编码来解码 self.keyword = args.keyword.decode(getdefaultlocale()[1]) #数据库 self.database = Database() #线程池,指定线程数 self.threadPool = ThreadPool(args.threadNum) #已访问的链接 self.visitedHrefs = set() #待访问的链接 self.unvisitedHrefs = deque() #添加首个待访问的链接 self.unvisitedHrefs.append(args.url) #标记爬虫是否开始执行任务 self.isCrawling = False
def saveProxies(): threadPool = ThreadPool(30) threadPool.startThreads() proxyFileOK = open('proxyOK.txt', 'a') proxyFileFail = open('proxyFail.txt', 'a') for proxy in proxiex: threadPool.putTask(checkProxy, proxy) while threadPool.getTaskLeft(): flag, proxy = threadPool.getTaskResult() print flag, proxy if flag == 'ok': proxyFileOK.write(proxy) proxyFileOK.write('\n') else: proxyFileFail.write(proxy) proxyFileFail.write('\n') threadPool.stopThreads() proxyFileOK.close() proxyFileFail.close()
def __init__(self, dbName, threadNum, logLevel, startUrls, depth, keyword, downloadMode): self.__threadNum = threadNum self.__startUrls = startUrls self.__depth = depth self.__keyword = keyword self.__downloadMode = downloadMode self.__dbName = dbName self.__logLevel = logLevel self.__exitEvent = threading.Event() # url队列存储待下载的url节点 self.__urlQueue = Queue.Queue() # html队列存储已经下载完成等待解析的html节点 self.__htmlQueue = Queue.Queue() # data队列存储已解析完成并符合存入数据库条件的html节点 self.__dataQueue = Queue.Queue() # 存储为各个下载模块分配的下载队列 self.__downloadQueueList = [] # 创建线程池 self.__threadPool = ThreadPool(threadNum + 2) self.__downloadingFlag = 0
def main(server, fileManager): thread_pool = ThreadPool(thread_number=2, target=worker, args=(server, handler, '/var/www/html', fileManager)) thread_pool.start()
def __init__(self, ip, port, directory): super(FileTransferService, self).__init__(name='FileTransfer', ip=ip, port=port) self.directory = directory self.threadpool = ThreadPool(2)