示例#1
0
 def _do(self):
     cnt = MemcacheUtil.get(const.PROCESSCNTKEY)
     if cnt is None:
         cnt = 1
     else:
         cnt += 1
     MemcacheUtil.set(const.PROCESSCNTKEY, cnt)
     logger.debug("process cnt:" + str(cnt))
     return True
示例#2
0
 def _do(self):
     cnt = MemcacheUtil.get(const.PROCESSCNTKEY)
     if cnt is None:
         cnt = 1
     else:
         cnt += 1
     MemcacheUtil.set(const.PROCESSCNTKEY, cnt)
     logger.debug("process cnt:"+str(cnt))
     return True
示例#3
0
文件: test.py 项目: gandaruvu/SpiderZ
    def testGetUrl():
        PyMongoUtil.clean()
        MemcacheUtil.clean()
        SpiderBloomFilter()

        html = GetWords.get_content("http://www.leakedin.com/tag/emailpassword-dump/")
        list = UrlScan.scanpage(html,"http://www.leakedin.com/tag/emailpassword-dump/",None)

        for l in list:
            PyMongoUtil.write(l,[""])
        print len(list)
示例#4
0
文件: test.py 项目: zzmzz/SpiderZ
    def testGetUrl():
        PyMongoUtil.clean()
        MemcacheUtil.clean()
        SpiderBloomFilter()

        html = GetWords.get_content(
            "http://www.leakedin.com/tag/emailpassword-dump/")
        list = UrlScan.scanpage(
            html, "http://www.leakedin.com/tag/emailpassword-dump/", None)

        for l in list:
            PyMongoUtil.write(l, [""])
        print len(list)
示例#5
0
文件: test.py 项目: gandaruvu/SpiderZ
def trytry():
    PyMongoUtil.clean()
    MemcacheUtil.clean()
    SpiderBloomFilter()
    queue = PyPool.get_queue()
    lock = PyPool.get_lock()
    listener = MyListener()
    r = Regex("[a-z0-9\-\._]+@[a-z0-9\-\.]+\.[a-z]{2,4}[:,\|]*.*")
    s = SpiderStrategy("http://www.leakedin.com/tag/emailpassword-dump/", 2, is_out=False, pattern=None, mode=r)
    Spider(s).get_all_words(queue, lock)
    listener.listen(lock, queue)
    WordCount.calc_count()
    return
示例#6
0
文件: test.py 项目: zzmzz/SpiderZ
def trytry():
    PyMongoUtil.clean()
    MemcacheUtil.clean()
    SpiderBloomFilter()
    queue = PyPool.get_queue()
    lock = PyPool.get_lock()
    listener = MyListener()
    r = Regex("[a-z0-9\-\._]+@[a-z0-9\-\.]+\.[a-z]{2,4}[:,\|]*.*")
    s = SpiderStrategy("http://www.leakedin.com/tag/emailpassword-dump/",
                       2,
                       is_out=False,
                       pattern=None,
                       mode=r)
    Spider(s).get_all_words(queue, lock)
    listener.listen(lock, queue)
    WordCount.calc_count()
    return
示例#7
0
from SpiderUtils.bloomFilter import SpiderBloomFilter
from SpiderUtils.spider import Spider
from SpiderUtils.spiderStrategy import SpiderStrategy
from Statics.wordCount import WordCount
from Utils.logFactory import LogFactory
from SpiderUtils.SpiderMode.regexMode import Regex
from SpiderUtils.enums import Language
from SpiderUtils.getWords import GetWords
from PyIO.excelUtil import ExcelUtil
from os import path

logger = LogFactory.getlogger("main")

# clean old data
PyMongoUtil.clean()
MemcacheUtil.clean()

# create bloom filter
SpiderBloomFilter()

# multitask prepare
queue = PyPool.get_queue()
lock = PyPool.get_lock()
listener = MyListener()


def err():
    print("please enter the right select")


while True:
示例#8
0
            try:
                lock.acquire()
                size = queue.qsize()
                size = PyPool.limit if size > PyPool.limit else size
                for num in range(0, size):
                    strategy = queue.get_nowait()
                    ProcessCntIncrease().lock_and_do()
                    self.__pool.apply_async(apply_spider, (strategy, queue, lock))
            except Exception, e:
                logger.error(e)
            finally:
                lock.release()

            try:
                lock.acquire()
                length = MemcacheUtil.get(const.PROCESSCNTKEY)
                size = queue.qsize()
                if size == 0 and (length is None or length == 0):
                    loop_flag = False
                else:
                    time.sleep(3)
            except Exception, e:
                logger.error(str(e))
            finally:
                lock.release()

        self.__pool.close()
        logger.info("start to wait for all processes")
        self.__pool.join()
        return
示例#9
0
文件: listener.py 项目: zzmzz/SpiderZ
                lock.acquire()
                size = queue.qsize()
                size = PyPool.limit if size > PyPool.limit else size
                for num in range(0, size):
                    strategy = queue.get_nowait()
                    ProcessCntIncrease().lock_and_do()
                    self.__pool.apply_async(apply_spider,
                                            (strategy, queue, lock))
            except Exception, e:
                logger.error(e)
            finally:
                lock.release()

            try:
                lock.acquire()
                length = MemcacheUtil.get(const.PROCESSCNTKEY)
                size = queue.qsize()
                if size == 0 and (length is None or length == 0):
                    loop_flag = False
                else:
                    time.sleep(3)
            except Exception, e:
                logger.error(str(e))
            finally:
                lock.release()

        self.__pool.close()
        logger.info("start to wait for all processes")
        self.__pool.join()
        return