def _sync_hash_prefix_cache(self): self.api_client.fair_use_delay() client_state = self.storage.get_client_state() new_client_state = {} for response in self.api_client.get_threats_update(client_state): response_threat_list = ThreatList(response['threatType'], response['platformType'], response['threatEntryType']) if response['responseType'] == 'FULL_UPDATE': self.storage.delete_hash_prefix_list(response_threat_list) for r in response.get('removals', []): self.storage.remove_hash_prefix_indices( response_threat_list, r['rawIndices']['indices']) for a in response.get('additions', []): hash_prefix_list = HashPrefixList( a['rawHashes']['prefixSize'], b64decode(a['rawHashes']['rawHashes'])) self.storage.populate_hash_prefix_list(response_threat_list, hash_prefix_list) expected_checksum = b64decode(response['checksum']['sha256']) if self._verify_threat_list_checksum(response_threat_list, expected_checksum): log.info('Local cache checksum matches the server: {}'.format( to_hex(expected_checksum))) new_client_state[response_threat_list] = response[ 'newClientState'] else: raise Exception( 'Local cache checksum does not match the server: "{}". Consider removing {}' .format(to_hex(expected_checksum), self.storage.db_path)) self.storage.update_threat_list_client_state(new_client_state)
def update_hash_prefix_cache(self): """Update locally cached threat lists. """ self.api_client.fair_use_delay() self.storage.cleanup_full_hashes() threat_lists_to_remove = dict() for ts, cs in self.storage.get_threat_lists(): threat_lists_to_remove[repr(ts)] = ts threat_lists = self.api_client.get_threats_lists() for entry in threat_lists: threat_list = ThreatList.from_api_entry(entry) if self.platforms is None or threat_list.platform_type in self.platforms: self.storage.add_threat_list(threat_list) try: del threat_lists_to_remove[repr(threat_list)] except KeyError: pass for ts in threat_lists_to_remove.values(): self.storage.delete_hash_prefix_list(ts) self.storage.delete_threat_list(ts) del threat_lists_to_remove self.api_client.fair_use_delay() threat_lists = self.storage.get_threat_lists() client_state = dict([(t.as_tuple(), s) for t, s in threat_lists]) for response in self.api_client.get_threats_update(client_state): response_threat_list = ThreatList(response['threatType'], response['platformType'], response['threatEntryType']) if response['responseType'] == 'FULL_UPDATE': self.storage.delete_hash_prefix_list(response_threat_list) for r in response.get('removals', []): self.storage.remove_hash_prefix_indices( response_threat_list, r['rawIndices']['indices']) for a in response.get('additions', []): hash_prefix_list = HashPrefixList( a['rawHashes']['prefixSize'], b64decode(a['rawHashes']['rawHashes'])) self.storage.populate_hash_prefix_list(response_threat_list, hash_prefix_list) expected_checksum = b64decode(response['checksum']['sha256']) if self._verify_threat_list_checksum(response_threat_list, expected_checksum): log.info('Local cache checksum matches the server: {}'.format( to_hex(expected_checksum))) self.storage.update_threat_list_client_state( response_threat_list, response['newClientState']) else: raise Exception( 'Local cache checksum does not match the server: "{}". Consider removing {}' .format(to_hex(expected_checksum), self.storage.db_path))
def store_full_hash(self, threat_list, hash_value, cache_duration, malware_threat_type): "Store full hash found for the given hash prefix" log.info( 'Storing full hash {} to list {} with cache duration {}'.format( to_hex(hash_value), str(threat_list), cache_duration)) qi = '''INSERT OR IGNORE INTO full_hash (value, threat_type, platform_type, threat_entry_type, malware_threat_type, downloaded_at) VALUES (?, ?, ?, ?, ?, current_timestamp) ''' qu = "UPDATE full_hash SET expires_at=datetime(current_timestamp, '+{} SECONDS') \ WHERE value=? AND threat_type=? AND platform_type=? AND threat_entry_type=?" i_parameters = [ sqlite3.Binary(hash_value), threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type, malware_threat_type ] u_parameters = [ sqlite3.Binary(hash_value), threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type ] with self.get_cursor() as dbc: dbc.execute(qi, i_parameters) dbc.execute(qu.format(int(cache_duration)), u_parameters)
def main(): key = sys.argv[1] db = "../gsb_v4.db" platforms = ["WINDOWS"] sbl = SafeBrowsingList(key, db_path=db, platforms=platforms) #sbl.update_hash_prefix_cache() print(sbl.storage.get_threat_lists()) url = sys.argv[2] u = URL(url) print(u.url) print(u.canonical) for i in u.url_permutations(u.canonical): print(i) print(u.digest(i)) url_hashes = u.hashes print(url_hashes) full_hashes = list(url_hashes) print(full_hashes) cues = [to_hex(fh[0:4]) for fh in full_hashes] print(cues) print(sbl.storage.lookup_hash_prefix(cues)) bl = sbl.lookup_url(url) print(bl)
def _lookup_hashes(self, full_hashes): """Lookup URL hash in blacklists Returns names of lists it was found in. """ full_hashes = list(full_hashes) cues = [to_hex(fh[0:4]) for fh in full_hashes] result = [] try: matching_prefixes = {} matching_full_hashes = set() is_potential_threat = False # First lookup hash prefixes which match full URL hash for (threat_list, hash_prefix, negative_cache_expired ) in self.storage.lookup_hash_prefix(cues): for full_hash in full_hashes: if full_hash.startswith(hash_prefix): is_potential_threat = True # consider hash prefix negative cache as expired if it is expired in at least one threat list matching_prefixes[hash_prefix] = matching_prefixes.get( hash_prefix, False) or negative_cache_expired matching_full_hashes.add(full_hash) # if none matches, URL hash is clear if not is_potential_threat: return [] # if there is non-expired full hash, URL is blacklisted matching_expired_threat_lists = set() for threat_list, has_expired in self.storage.lookup_full_hashes( matching_full_hashes): if has_expired: matching_expired_threat_lists.add(threat_list) else: result.append(threat_list) if result: return result # If there are no matching expired full hash entries # and negative cache is still current for all prefixes, consider it safe if len(matching_expired_threat_lists) == 0 and sum( map(int, matching_prefixes.values())): log.info('Negative cache hit.') return [] # Now we can assume that there are expired matching full hash entries and/or # cache prefix entries with expired negative cache. Both require full hash sync. self._sync_full_hashes(matching_prefixes.keys()) # Now repeat full hash lookup for threat_list, has_expired in self.storage.lookup_full_hashes( matching_full_hashes): if not has_expired: result.append(threat_list) except: self.storage.db.rollback() raise return result
def populate_hash_prefix_list(self, threat_list, hash_prefix_list): log.info('Storing {} entries of hash prefix list {}'.format(len(hash_prefix_list), str(threat_list))) q = '''INSERT INTO hash_prefix (value, cue, threat_type, platform_type, threat_entry_type, timestamp) VALUES (?, ?, ?, ?, ?, current_timestamp) ''' with self.get_cursor() as dbc: records = [[sqlite3.Binary(prefix_value), to_hex(prefix_value[0:4]), threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type] for prefix_value in hash_prefix_list] dbc.executemany(q, records)
def _sync_hash_prefix_cache(self): self.api_client.fair_use_delay() client_state = self.storage.get_client_state() for response in self.api_client.get_threats_update(client_state): response_threat_list = ThreatList(response['threatType'], response['platformType'], response['threatEntryType']) if response['responseType'] == 'FULL_UPDATE': self.storage.delete_hash_prefix_list(response_threat_list) for r in response.get('removals', []): self.storage.remove_hash_prefix_indices(response_threat_list, r['rawIndices']['indices']) for a in response.get('additions', []): hash_prefix_list = HashPrefixList(a['rawHashes']['prefixSize'], b64decode(a['rawHashes']['rawHashes'])) self.storage.populate_hash_prefix_list(response_threat_list, hash_prefix_list) expected_checksum = b64decode(response['checksum']['sha256']) log.info('Verifying threat hash prefix list checksum') if self._verify_threat_list_checksum(response_threat_list, expected_checksum): log.info('Local cache checksum matches the server: {}'.format(to_hex(expected_checksum))) self.storage.update_threat_list_client_state(response_threat_list, response['newClientState']) self.storage.commit() else: raise Exception('Local cache checksum does not match the server: ' '"{}". Consider removing {}'.format(to_hex(expected_checksum), self.storage.db_path))
def _lookup_hashes(self, full_hashes): """Lookup URL hash in blacklists Returns names of lists it was found in. """ full_hashes = list(full_hashes) cues = [to_hex(fh[0:4]) for fh in full_hashes] result = [] try: matching_prefixes = {} matching_full_hashes = set() is_potential_threat = False # First lookup hash prefixes which match full URL hash for (threat_list, hash_prefix, negative_cache_expired) in self.storage.lookup_hash_prefix(cues): for full_hash in full_hashes: if full_hash.startswith(hash_prefix): is_potential_threat = True # consider hash prefix negative cache as expired if it is expired in at least one threat list matching_prefixes[hash_prefix] = matching_prefixes.get(hash_prefix, False) or negative_cache_expired matching_full_hashes.add(full_hash) # if none matches, URL hash is clear if not is_potential_threat: return [] # if there is non-expired full hash, URL is blacklisted matching_expired_threat_lists = set() for threat_list, has_expired in self.storage.lookup_full_hashes(matching_full_hashes): if has_expired: matching_expired_threat_lists.add(threat_list) else: result.append(threat_list) if result: return result # If there are no matching expired full hash entries # and negative cache is still current for all prefixes, consider it safe if len(matching_expired_threat_lists) == 0 and sum(map(int, matching_prefixes.values())) == 0: log.info('Negative cache hit.') return [] # Now we can assume that there are expired matching full hash entries and/or # cache prefix entries with expired negative cache. Both require full hash sync. self._sync_full_hashes(matching_prefixes.keys()) # Now repeat full hash lookup for threat_list, has_expired in self.storage.lookup_full_hashes(matching_full_hashes): if not has_expired: result.append(threat_list) except: self.storage.db.rollback() raise return result
def store_full_hash(self, threat_list, hash_value, cache_duration, malware_threat_type): """Store full hash found for the given hash prefix""" log.info('Storing full hash %s to list %s with cache duration %s', to_hex(hash_value), str(threat_list), cache_duration) qi = '''INSERT OR IGNORE INTO full_hash (value, threat_type, platform_type, threat_entry_type, malware_threat_type, downloaded_at) VALUES (?, ?, ?, ?, ?, current_timestamp) ''' qu = "UPDATE full_hash SET expires_at=datetime(current_timestamp, '+{} SECONDS') \ WHERE value=? AND threat_type=? AND platform_type=? AND threat_entry_type=?" i_parameters = [sqlite3.Binary(hash_value), threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type, malware_threat_type] u_parameters = [sqlite3.Binary(hash_value), threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type] with self.get_cursor() as dbc: dbc.execute(qi, i_parameters) dbc.execute(qu.format(int(cache_duration)), u_parameters)
def update_hash_prefix_cache(self): """Update locally cached threat lists. """ self.api_client.fair_use_delay() self.storage.cleanup_full_hashes() threat_lists_to_remove = dict() for ts, cs in self.storage.get_threat_lists(): threat_lists_to_remove[repr(ts)] = ts threat_lists = self.api_client.get_threats_lists() for entry in threat_lists: threat_list = ThreatList.from_api_entry(entry) if self.platforms is None or threat_list.platform_type in self.platforms: self.storage.add_threat_list(threat_list) try: del threat_lists_to_remove[repr(threat_list)] except KeyError: pass for ts in threat_lists_to_remove.values(): self.storage.delete_hash_prefix_list(ts) self.storage.delete_threat_list(ts) del threat_lists_to_remove self.api_client.fair_use_delay() threat_lists = self.storage.get_threat_lists() client_state = dict([(t.as_tuple(), s) for t,s in threat_lists]) for response in self.api_client.get_threats_update(client_state): response_threat_list = ThreatList(response['threatType'], response['platformType'], response['threatEntryType']) if response['responseType'] == 'FULL_UPDATE': self.storage.delete_hash_prefix_list(response_threat_list) for r in response.get('removals', []): self.storage.remove_hash_prefix_indices(response_threat_list, r['rawIndices']['indices']) for a in response.get('additions', []): hash_prefix_list = HashPrefixList(a['rawHashes']['prefixSize'], b64decode(a['rawHashes']['rawHashes'])) self.storage.populate_hash_prefix_list(response_threat_list, hash_prefix_list) expected_checksum = b64decode(response['checksum']['sha256']) if self._verify_threat_list_checksum(response_threat_list, expected_checksum): log.info('Local cache checksum matches the server: {}'.format(to_hex(expected_checksum))) self.storage.update_threat_list_client_state(response_threat_list, response['newClientState']) else: raise Exception('Local cache checksum does not match the server: "{}". Consider removing {}'.format(to_hex(expected_checksum), self.storage.db_path))
def lookup_db(): from gglsbl import SafeBrowsingList from gglsbl.protocol import URL from gglsbl.utils import to_hex res = {} rdict = { 'status':'', 'message':'', } key = '' db = '../gsb_v4.db' platforms = ['WINDOWS'] if os.path.isfile(config): cp = ConfigParser() cp.read(config) if 'api' in cp: if 'key' in cp['api']: key = cp['api']['key'] if 'database' in cp: if 'localdb' in cp['database']: db = cp['database']['localdb'] if not key: logging.error('API key not found.') rdict['status'] = 500 rdict['message'] = 'Internal Server Error' url = '' update = False if request.method == 'GET': url = request.args.get('url') update = request.args.get('update') if not url: rdict['status'] = 400 rdict['message'] = "The parameter 'url' is missing" if not rdict['status']: sbl = SafeBrowsingList(key, db_path=db, platforms=platforms) logging.debug(sbl.storage.get_threat_lists()) #if update: # sbl.update_hash_prefix_cache() u = URL(url) #res['url'] = { res = { 'query': u.url, 'canonical': u.canonical, 'permutations': [], } for i in u.url_permutations(u.canonical): p = { 'pattern': i, 'sha256': to_hex(u.digest(i)) } #res['url']['permutations'].append(p) res['permutations'].append(p) url_hashes = u.hashes full_hashes = list(url_hashes) cues = [to_hex(fh[0:4]) for fh in full_hashes] #res['cues'] = cues res['results'] = [] matched = sbl.storage.lookup_hash_prefix(cues) for m in matched: prefix = to_hex(m[1]) for p in res['permutations']: if re.match(prefix, p['sha256']): result = { 'pattern': p['pattern'], #'prefix': to_hex(m[1]), 'prefix': prefix, 'matched': str(m[0]), } res['results'].append(result) #bl = sbl.lookup_url(url) #res['matched'] = bl logging.info(res) res = jsonify(res) if not res: if not rdict["status"]: rdict["status"] = 400 rdict["message"] = "Invalid request." res = jsonify(rdict) res.status_code = rdict["status"] return res