class SafeBrowsingList(object): """ Manages comparisons and data freshness """ # Size of prefixes in bytes hash_size = 32 prefix_size = 4 type = "invalid" def __init__(self, list_name, source_url, settings): self.name = list_name self.source_url = source_url self.url = urlparse(source_url) self.settings = settings scheme = self.url.scheme.lower() interval = settings.get("refresh_check_interval", 10 * 60) if scheme == "file" or not (self.url.scheme and self.url.netloc): self._source = FileSource(self.source_url, refresh_interval=interval) elif scheme == "s3+file": self._source = S3FileSource(self.source_url, refresh_interval=interval) else: raise ValueError("Only local single files and S3 single files " "sources supported at this time") self._source.load() def refresh(self): self._source.refresh() def delta(self, adds, subs): """ Calculates the delta necessary for a given client to catch up to the server's idea of "current" This current iteration is very simplistic algorithm """ current_adds, current_subs = self._source.list_chunks() # FIXME Should we call issuperset() first to be sure we're not getting # weird stuff from the request? a_delta = current_adds.difference(adds) s_delta = current_subs.difference(subs) return sorted(a_delta), sorted(s_delta) def fetch(self, add_chunks=[], sub_chunks=[]): details = self._source.fetch(add_chunks, sub_chunks) details["type"] = self.type return details def fetch_adds(self, add_chunks): return self.fetch(add_chunks, [])["adds"] def fetch_subs(self, sub_chunks): return self.fetch([], sub_chunks)["subs"] def find_prefix(self, prefix): # Don't bother looking for prefixes that aren't the right size if len(prefix) != self.prefix_size: return () return self._source.find_prefix(prefix)
def test_refresh(self): # FIXME Timing issues causing intermittent failures. f = FileSource("file://" + self.source.name, 0.5) f.load() self.assertFalse(f.refresh()) self.source.seek(0) self.source.write("%s\n%s" % (self.add, self.sub)) self.source.flush() self.source.seek(0) times = os.stat(self.source.name) os.utime(self.source.name, (times.st_atime, times.st_mtime + 2)) self.assertTrue(f.needs_refresh())
def test_list_chunks(self): f = FileSource("file://" + self.source.name, 1) f.load() self.assertEqual(f.list_chunks(), (set([17]), set([18])))
def test_load(self): f = FileSource("file://" + self.source.name, 1) f.load() self.assertEqual(f.chunks, ChunkList(add_chunks=simple_adds, sub_chunks=simple_subs))
def test_load(self): f = FileSource("file://" + self.source.name, 1) f.load() self.assertEqual( f.chunks, ChunkList(add_chunks=simple_adds, sub_chunks=simple_subs))