def test_cache_clear_domain(self): CLEAR_CACHE_DIRECTORY = os.path.join(TEST_DIRECTORY, "test_cache_clear_domain") os.makedirs(CLEAR_CACHE_DIRECTORY, exist_ok=True) html_string = "<html></html>".encode("utf-8") example_urls = [ "http://www.example.com/testpage.html", "http://www.example.com/testpage2.html" ] test_cache = Cache(CLEAR_CACHE_DIRECTORY) # add all of the files for url in example_urls: test_cache.save(url, html_string) # verify that the files exist domain_folder = os.path.join(CLEAR_CACHE_DIRECTORY, "www_example_com") self.assertTrue(os.path.exists(domain_folder)) for url in example_urls: domain = dir_domain(url) filename = clean_url_hash(url) self.assertIn(domain, test_cache.sites) domain_urls = test_cache.sites[domain] filename_path = os.path.join(domain_folder, filename) self.assertIn(filename_path, domain_urls) self.assertTrue(os.path.isfile(filename_path)) # now delete them test_cache.clear_domain("www.example.com") # and verify that they no longer exist self.assertNotIn("www_example_com", test_cache.sites) self.assertFalse(os.path.exists(domain_folder))
def test_dangerously_convert_cache_to_gzip(self): html_string = "<html></html>".encode("utf-8") example_url = "http://www.example.com/testpage.html" cache = Cache(CONVERT_DIRECTORY) cache.save(example_url, html_string) self.assertTrue(cache.has(example_url)) dangerously_convert_cache_to_gzip(CONVERT_DIRECTORY) gcache = GzipCache(CONVERT_DIRECTORY) self.assertTrue(gcache.has(example_url)) for name in os.listdir(CONVERT_DIRECTORY): shutil.rmtree(os.path.join(CONVERT_DIRECTORY, name))
def test_cache_save_existing(self): c = Cache(CACHE_DIRECTORY) # verify that it adds a file to a pre-existing cached site html_string = "<html></html>".encode("utf-8") example_url = "http://www.example.com/testpage.html" d = dir_domain(example_url) f = clean_url_hash(example_url) full_save_name = os.path.join(CACHE_DIRECTORY, d, f) self.assertNotIn(full_save_name, c.sites[d]) c.save(example_url, html_string) self.assertTrue(os.path.exists(full_save_name)) self.assertIn(full_save_name, c.sites[d]) # cleanup os.remove(full_save_name)
def test_cache_save_new(self): c = Cache(CACHE_DIRECTORY) html_string = "<html></html>".encode("utf-8") sample_url = "http://www.sample.com/testpage.html" d = dir_domain(sample_url) f = clean_url_hash(sample_url) DIRECTORY = os.path.join(CACHE_DIRECTORY, d) # the www_sample_com directory should not exist until the file is cached self.assertFalse(os.path.exists(DIRECTORY)) self.assertNotIn(d, c.sites) c.save(sample_url, html_string) full_save_name = os.path.join(DIRECTORY, f) self.assertIn(full_save_name, c.sites[d]) self.assertTrue(os.path.exists(full_save_name)) # remove this after the test is done shutil.rmtree(DIRECTORY)