def __init__(self, path, debug): RFPDupeFilter.__init__(self, path, debug) self.db = dbconnection.getConnection() self.cur = self.db.cursor() self.task = task self.urls = self.loadFromDB() self.filter = getPersistFilter(self.task)
def __init__(self, path=None, debug=False): logging.info("init redis bloomFilter") self.key = "url" self.redis_client = redis.Redis(host='127.0.0.1', port=6379) error_rate = 0.001 initial_size = 1000 try: # bf.reserve,提供了三个参数, key, error_rate和initial_size。错误率越低,需要的空间越大,initial_size # 参数表示预计放入布隆过滤器的元素数量,当实际数量超出这个数值时,误判率会上升。 默认的参数是 error_rate=0.01, initial_size=100。 self.redis_client.execute_command("bf.reserve", self.key, error_rate, initial_size) except ResponseError as e: logging.info(e) RFPDupeFilter.__init__(self, path)
def __init__(self, path=None): self.urls_sbf = ScalableBloomFilter(mode=ScalableBloomFilter.SMALL_SET_GROWTH) RFPDupeFilter.__init__(self, path)
def __init__(self, path=None): RFPDupeFilter.__init__(self, path)
def __init__(self, path=None, debug=False): self.urls_seen = set() RFPDupeFilter.__init__(self, path)
def __init__(self, path=None, debug=False): RFPDupeFilter.__init__(self, path=None, debug=False) self.rclient = redis.StrictRedis(host="localhost", port=6379, db=0)
def __init__(self, path=None, other=None): inmem = [it['url'] for it in MongoClient(settings['DBINFO']).nbbs.dsl.find({'out': 1})] self.already_seen = set(inmem) RFPDupeFilter.__init__(self, path, other)
def __init__(self, path=None, debug=False): RFPDupeFilter.__init__(self, path) self.dupefilter = UrlFilterAndAdd()
def __init__(self, path=None, debug=True): self.redis_client = RedisHelper.get_instance() RFPDupeFilter.__init__(self, path, debug)
def __init__(self, path=None): self.url_seen = set() RFPDupeFilter.__init__(self, path)
def __init__(self, path=None, debug=None): RFPDupeFilter.__init__(self, path, debug) self.fingerprints = {} print "[***] filter running!"
def __init__(self, path=None, debug=False): RFPDupeFilter.__init__(self, path=path, debug=debug)