def test_database_type(self): param = {} # test wrong database type with self.assertRaises(ValueError): db = NoSQL("rdeis", param) db = NoSQL("Redis", param) db = NoSQL("BDb", param) # test right database type try: db = NoSQL("redis", param) except Exception: self.fail("Correct database name:redis, unexpected exceptions")
def UrlChecker(job, param, headers): if "last-modified" in headers: last_update = eut.parsedate(headers["last-modified"]) else: last_update = eut.parsedate(strftime("%a, %d %b %Y %H:%M:%S GMT", gmtime())) db = NoSQL(param["database"]["engine"], {"host": param["database"]["host"], "port": param["database"]["port"], "db": param["database"]["db"]["urlcache"]}) result_str = db.get(job.identifier) # For the url that has never been cached before or deleted by LRU if result_str is None: result = { "last-modified": last_update, "url": job.url, } # TODO: shouldn't pickle at this levl db.set(job.identifier, pk.dumps(result)) return False, result result = pk.loads(result_str) # For the urls that is not cached but has the same identifer if result["url"] != job.url: result["url"] = job.url result["last-modified"] = last_update db.set(job.identifier, pk.dumps(result)) return False, result cached_date = result["last-modified"] hour_diff = (mktime(last_update) - mktime(cached_date))/3600 if hour_diff >= param["crawlperiod"]: result["last-modified"] = last_update db.set(job.identifier, pk.dumps(result)) return False, result return True, result
def UrlChecker(job, param, headers): if "last-modified" in headers: last_update = eut.parsedate(headers["last-modified"]) else: last_update = eut.parsedate( strftime("%a, %d %b %Y %H:%M:%S GMT", gmtime())) db = NoSQL( param["database"]["engine"], { "host": param["database"]["host"], "port": param["database"]["port"], "db": param["database"]["db"]["urlcache"] }) result_str = db.get(job.identifier) # For the url that has never been cached before or deleted by LRU if result_str is None: result = { "last-modified": last_update, "url": job.url, } # TODO: shouldn't pickle at this levl db.set(job.identifier, pk.dumps(result)) return False, result result = pk.loads(result_str) # For the urls that is not cached but has the same identifer if result["url"] != job.url: result["url"] = job.url result["last-modified"] = last_update db.set(job.identifier, pk.dumps(result)) return False, result cached_date = result["last-modified"] hour_diff = (mktime(last_update) - mktime(cached_date)) / 3600 if hour_diff >= param["crawlperiod"]: result["last-modified"] = last_update db.set(job.identifier, pk.dumps(result)) return False, result return True, result
def test_redis_init(self): # test bad init parameters param = {"host": "localhost", "port": 6379, "db": 0} try: db = NoSQL("redis", param) db.set("foo", "bar") except Exception: # TODO: Change this to dictionary no key exception self.fail("Unexpected redis NoSQL instance init exception") param = {"host": "localhost", "port": "6379", "db": 0} try: db = NoSQL("redis", param) db.set("foo", "bar") except Exception: self.fail("Unexpected redis NoSQL instance init exception") with self.assertRaises(redis.ConnectionError): # Port number cannot be string param = {"host": "localhost", "port": "6379", "db": "0"} db = NoSQL("redis", param) db.set("foo", "bar") param = {"host": "big", "port": "6379", "db": 0} db = NoSQL("redis", param) db.set("foo", "bar") param = {"host": "localhost", "port": "dog", "db": 0} db = NoSQL("redis", param) db.set("foo", "bar") param = {"host": "localhost", "port": "6379", "db": "5f"} db = NoSQL("redis", param) db.set("foo", "bar") param = {"host": "localhost", "port": "6d379", "db": 0} db = NoSQL("redis", param) db.set("foo", "bar")
def worker6(value): param = {} param["db"] = value db = NoSQL("redis", param) db.get("foo")
def worker5(value): param = {} param["db"] = value db = NoSQL("redis", param) db.set("foo", "bar")
def worker4(value): param = {} db = NoSQL("redis", param) db.get("foo")
def worker3(pairs): param = {} db = NoSQL("redis", param) db.get(pairs[0])
def worker2(value): param = {} db = NoSQL("redis", param) db.set("foo", value)
def worker(pairs): param = {} db = NoSQL("redis", param) db.set(pairs[0], pairs[1])