def compile_hs(self, hs_db_file=None): if hs_db_file and os.path.isfile(hs_db_file): log.info(f'Loading Hyperscan DB from disk: {hs_db_file}') with open(hs_db_file, 'rb') as f: self.hs_db = hyperscan.loads(bytearray(f.read())) return self.hs_db = hyperscan.Database() num_patterns = len(self.regexes) expressions = list( map( functools.partial(str.encode, encoding='utf-8'), map(operator.itemgetter(self.Regex._fields.index('pattern')), self.regexes))) ids = list(range(num_patterns)) flags = list( map(operator.itemgetter(self.Regex._fields.index('hs_flags')), self.regexes)) start = time.time() self.hs_db.compile( expressions=expressions, ids=ids, elements=num_patterns, flags=flags, ) self.stats.hs_compilation_time = time.time() - start if hs_db_file: log.info(f'Saving Hyperscan DB to disk: {hs_db_file}') with open(hs_db_file, 'wb') as f: f.write(hyperscan.dumps(self.hs_db))
def _load_hs_db(path: str): if os.path.isfile(path) is False: Logger() \ .error(message=f"File at path: {path} does not exists") \ .out(severity=Severity.ERROR) return None with io.open(path, "rb") as bin_file: bin_data = bin_file.read() Logger() \ .event(category="hyperscan", action="hyperscan database loaded", dataset=path) \ .out(severity=Severity.INFO) return hs.loads(bytearray(bin_data))
def test_database_deserialize(database_stream): serialized = hyperscan.dumps(database_stream) db = hyperscan.loads(bytearray(serialized)) assert id(db) != id(database_stream)