def fetch(self, fingerprints): to_fetch = [f for f in fingerprints if f not in self._state_cache] print "to fetch %d from %d" % (len(to_fetch), len(fingerprints)) print "cache size %s" % len(self._state_cache) for chunk in chunks(to_fetch, 65536): keys = [unhexlify(fprint) for fprint in chunk] table = self.connection.table(self._table_name) records = table.rows(keys, columns=['s:state']) for key, cells in records: if 's:state' in cells: state = unpack('>B', cells['s:state'])[0] self._state_cache[hexlify(key)] = state
def flush(self, force_clear): if len(self._state_cache) > 3000000: force_clear = True table = self.connection.table(self._table_name) for chunk in chunks(self._state_cache.items(), 32768): with table.batch(transaction=True) as b: for fprint, state in chunk: hb_obj = prepare_hbase_object(state=state) b.put(unhexlify(fprint), hb_obj) if force_clear: print "Cache has %d items, clearing" % len(self._state_cache) self._state_cache.clear()
def fetch(self, fingerprints): to_fetch = [f for f in fingerprints if f not in self._state_cache] self.logger.debug("cache size %s" % len(self._state_cache)) self.logger.debug("to fetch %d from %d" % (len(to_fetch), len(fingerprints))) for chunk in chunks(to_fetch, 65536): keys = [unhexlify(fprint) for fprint in chunk] table = self.connection.table(self._table_name) records = table.rows(keys, columns=["s:state"]) for key, cells in records: if "s:state" in cells: state = unpack(">B", cells["s:state"])[0] self._state_cache[hexlify(key)] = state