def svd_inv_sqrt(C, mineig=0, hashtable=None): """Fast stable inverse using SVD. This can handle near-singular matrices. Also return the square root. """ # If we have a hash table, look for the precalculated solution h = None if hashtable is not None: # If arrays are in Fortran ordering, they are not hashable. if not C.flags['C_CONTIGUOUS']: C = C.copy(order='C') h = xxhash.xxh64_digest(C) if h in hashtable: return hashtable[h] # Cholesky decomposition seems to be too unstable for solving this # problem, so we use eigendecompostition instead. D, P = eigh(C) Ds = s.diag(1 / s.sqrt(D)) L = P @ Ds Cinv_sqrt = L @ P.T Cinv = L @ L.T # If there is a hash table, cache our solution. Bound the total cache # size by removing any extra items in FIFO order. if hashtable is not None: hashtable[h] = (Cinv, Cinv_sqrt) while len(hashtable) > max_table_size: hashtable.popitem(last=False) return Cinv, Cinv_sqrt
def pack(self, data): ( "pack(" "data:object" ") -> str" """ Generate a encrypted token from the marshalable data. """) serialized = self.magic_bytes + marshal.dumps(data) salt = Crypto.Random.get_random_bytes(8) key = xxhash.xxh64_digest(salt + self.rc4_key) cipher = Crypto.Cipher.ARC4.new(key) ct = cipher.encrypt(serialized) serialized = salt + ct salt = Crypto.Random.get_random_bytes(16) key = hashlib.md5(salt + self.aes_key).digest() cipher = Crypto.Cipher.AES.new(key, Crypto.Cipher.AES.MODE_CBC) ct = \ cipher.encrypt( Crypto.Util.Padding.pad( serialized, Crypto.Cipher.AES.block_size ) ) serialized = salt + cipher.iv + ct token = base64.b64encode(serialized).decode('utf-8') if len(token) > self.max_token: raise ValueError('the generated token is too large') self.cache[token] = Holder(data) return token
def svd_inv_sqrt(C, mineig=0, hashtable=None): """Fast stable inverse using SVD. This can handle near-singular matrices. Also return the square root. """ # If we have a hash table, look for the precalculated solution h = None if hashtable is not None: h = xxhash.xxh64_digest(C) if h in hashtable: return hashtable[h] U, V, D = svd(C) ignore = s.where(V < mineig)[0] Vi = 1.0 / V Vi[ignore] = 0 Visqrt = s.sqrt(Vi) Cinv = (D.T).dot(s.diag(Vi)).dot(U.T) Cinv_sqrt = (D.T).dot(s.diag(Visqrt)).dot(U.T) # If there is a hash table, cache our solution. Bound the total cache # size by removing any extra items in FIFO order. if hashtable is not None: hashtable[h] = (Cinv, Cinv_sqrt) while len(hashtable) > max_table_size: hashtable.popitem(last=False) return Cinv, Cinv_sqrt
def get_indices_and_finger(self, string_item): hash_value = xxh64_digest(string_item) index_1 = int.from_bytes(hash_value, byteorder="big") % self._filter_capacity int_finger = int.from_bytes(hash_value, byteorder="big") index_2 = (index_1 ^ self.obtain_index_from_hash(hash_value) ) % self._filter_capacity return index_1, index_2, int_finger
def svd_inv_sqrt(C: np.array, hashtable: OrderedDict = None, max_hash_size: int = None) -> (np.array, np.array): """Matrix inversion, based on decomposition. Built to be stable, and positive. Args: C: matrix to invert hashtable: if used, the hashtable to store/retrieve results in/from max_hash_size: maximum size of hashtable Return: (np.array, np.array): inverse of C and square root of the inverse of C """ # If we have a hash table, look for the precalculated solution h = None if hashtable is not None: # If arrays are in Fortran ordering, they are not hashable. if not C.flags['C_CONTIGUOUS']: C = C.copy(order='C') h = xxhash.xxh64_digest(C) if h in hashtable: return hashtable[h] D, P = scipy.linalg.eigh(C) for count in range(3): if np.any(D < 0) or np.any(np.isnan(D)): inv_eps = 1e-6 * (count - 1) * 10 D, P = scipy.linalg.eigh(C + np.diag(np.ones(C.shape[0]) * inv_eps)) else: break if count == 2: raise ValueError( 'Matrix inversion contains negative values,' + 'even after adding {} to the diagonal.'.format(inv_eps)) Ds = np.diag(1 / np.sqrt(D)) L = P @ Ds Cinv_sqrt = L @ P.T Cinv = L @ L.T # If there is a hash table, cache our solution. Bound the total cache # size by removing any extra items in FIFO order. if (hashtable is not None) and (max_hash_size is not None): hashtable[h] = (Cinv, Cinv_sqrt) while len(hashtable) > max_hash_size: hashtable.popitem(last=False) return Cinv, Cinv_sqrt
def get_or_create(tr, allocator, value): bytes = fdb.tuple.pack((value,)) hash = xxhash.xxh64_digest(bytes) key = fdb.tuple.pack((MAGIC, HASH_TO_UID, hash)) uid = tr.get(key) if uid != None: return uid # otherwise create it uid = allocator.allocate(tr) tr.set(key, uid) tr.set(fdb.tuple.pack((MAGIC, UID_TO_HASH, uid)), hash) tr.set(fdb.tuple.pack((MAGIC, UID_TO_VALUE, uid)), bytes) return uid
def seed(self, value=None) -> None: """Re-initialize the random generator with a new seed. Resets the sequence to its first value. Caution: This method cannot be called from within :meth:`cascade`, and will raise a :class:`RuntimeError` if attempted. Args: value (int, str, bytes, bytearray): The value to seed with. If this is a sequence type, the sequence is first hashed with seed 0. Raises: ValueError: If the seed value is not a supported type. """ if value is None: value = 0 self._seed = value if isinstance(value, int): # Convert int to at least 8 bytes, then hash them with # seed 0 to find the sequence's hash input. num_bytes = max(8, (value.bit_length() + 7) // 8) self._hash_input = xxhash.xxh64_digest(value.to_bytes( num_bytes, 'big'), seed=0) elif isinstance(value, (str, bytes, bytearray)): if isinstance(value, str): value = value.encode() # Hash the input with seed 0. self._hash_input = xxhash.xxh64_digest(value, seed=0) else: raise ValueError('Seed must be an int, str, bytes, or bytearray.') self.reset()
def unpack(self, token): ( "unpack(" "token:str" ") -> object" """ Validate the token and return the data contained in the encrypted token string. """) if len(token) > self.max_token: raise ValueError('token too large') holder = self.cache.get(token) if holder is None: block_size = Crypto.Cipher.AES.block_size serialized = base64.b64decode(token) p = 16 + block_size salt = serialized[ 0:16] iv = serialized[16:p ] ct = serialized[ p: ] if len(salt) != 16 or \ len( iv ) != block_size or \ len( ct ) % block_size != 0: raise VerificationError('Invalid token') key = hashlib.md5(salt + self.aes_key).digest() cipher = \ Crypto.Cipher.AES.new( key, Crypto.Cipher.AES.MODE_CBC, iv ) serialized = \ Crypto.Util.Padding.unpad( cipher.decrypt(ct), Crypto.Cipher.AES.block_size ) salt = serialized[0:8] ct = serialized[8: ] key = xxhash.xxh64_digest(salt + self.rc4_key) cipher = Crypto.Cipher.ARC4.new(key) serialized = cipher.encrypt(ct) if serialized[0:self.magic_bytes_len] != self.magic_bytes: raise VerificationError('Invalid token') data = marshal.loads(serialized[self.magic_bytes_len:]) self.cache[token] = Holder(data) return data else: return holder.data
def svd_inv_sqrt(C, mineig=0, hashtable=None): """Fast stable inverse using SVD. This can handle near-singular matrices. Also return the square root.""" h = None if hashtable is not None: h = xxhash.xxh64_digest(C) if h in hashtable: return hashtable[h] U, V, D = svd(C) ignore = s.where(V < mineig)[0] Vi = 1.0 / V Vi[ignore] = 0 Visqrt = s.sqrt(Vi) Cinv = (D.T).dot(s.diag(Vi)).dot(U.T) Cinv_sqrt = (D.T).dot(s.diag(Visqrt)).dot(U.T) if hashtable is not None: hashtable[h] = (Cinv, Cinv_sqrt) return Cinv, Cinv_sqrt
def svd_inv_sqrt(C, mineig=0, hashtable=None): """Fast stable inverse using SVD. This can handle near-singular matrices. Also return the square root. """ # If we have a hash table, look for the precalculated solution h = None if hashtable is not None: # If arrays are in Fortran ordering, they are not hashable. if not C.flags['C_CONTIGUOUS']: C = C.copy(order='C') h = xxhash.xxh64_digest(C) if h in hashtable: return hashtable[h] D, P = scipy.linalg.eigh(C) for count in range(3): if np.any(D < 0) or np.any(np.isnan(D)): inv_eps = 1e-6 * (count - 1) * 10 D, P = scipy.linalg.eigh(C + np.diag(np.ones(C.shape[0]) * inv_eps)) else: break if count == 2: raise ValueError( 'Matrix inversion contains negative values,' + 'even after adding {} to the diagonal.'.format(inv_eps)) Ds = np.diag(1 / np.sqrt(D)) L = P @ Ds Cinv_sqrt = L @ P.T Cinv = L @ L.T # If there is a hash table, cache our solution. Bound the total cache # size by removing any extra items in FIFO order. if hashtable is not None: hashtable[h] = (Cinv, Cinv_sqrt) while len(hashtable) > max_table_size: hashtable.popitem(last=False) return Cinv, Cinv_sqrt
def test_xxh64_overflow(self): s = 'I want an unsigned 64-bit seed!' a = xxhash.xxh64(s, seed=0) b = xxhash.xxh64(s, seed=2**64) self.assertEqual(a.seed, b.seed) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=0)) self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=2**64)) self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=0)) self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**64)) self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=0)) self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=2**64)) a = xxhash.xxh64(s, seed=1) b = xxhash.xxh64(s, seed=2**64 + 1) self.assertEqual(a.seed, b.seed) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=1)) self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=2**64 + 1)) self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=1)) self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**64 + 1)) self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=1)) self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=2**64 + 1)) a = xxhash.xxh64(s, seed=2**65 - 1) b = xxhash.xxh64(s, seed=2**66 - 1) self.assertEqual(a.seed, b.seed) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=2**65 - 1)) self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=2**66 - 1)) self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**65 - 1)) self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**66 - 1)) self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=2**65 - 1)) self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=2**66 - 1))
def from_file(cls: Type[DB], path: Union[str, PathLike], create_new=False) -> DB: """Load a Database from a path.""" path = Path(path) if not path.exists() and create_new: logger = logging.getLogger(__name__) logger.warning( "Database file does not exist. Starting with blank database." ) return cls() if path.suffix == ".gz": with gzip.open(path, "rb") as f: s = f.read() elif path.suffix == ".zst": with open(path, "rb") as f: c = f.read() has_checksum, checksum = ( zstd.get_frame_parameters(c).has_checksum, c[-4:], ) s = zstd.decompress(c) del c s_hash = xxhash.xxh64_digest(s) if has_checksum and checksum != s_hash[-4:][::-1]: raise DatabaseException( f"zstd content checksum verification failed: " f"{checksum.hex()} != {s_hash.hex()}" ) else: with open(path, "rb") as f: s = f.read() db = orjson.loads(s) del s db = cls.from_dict(db) return db
def test_xxh64_update(self): x = xxhash.xxh64() x.update('a') self.assertEqual(xxhash.xxh64('a').digest(), x.digest()) self.assertEqual(xxhash.xxh64_digest('a'), x.digest()) x.update('b') self.assertEqual(xxhash.xxh64('ab').digest(), x.digest()) self.assertEqual(xxhash.xxh64_digest('ab'), x.digest()) x.update('c') self.assertEqual(xxhash.xxh64('abc').digest(), x.digest()) self.assertEqual(xxhash.xxh64_digest('abc'), x.digest()) seed = random.randint(0, 2**64) x = xxhash.xxh64(seed=seed) x.update('a') self.assertEqual(xxhash.xxh64('a', seed).digest(), x.digest()) self.assertEqual(xxhash.xxh64_digest('a', seed), x.digest()) x.update('b') self.assertEqual(xxhash.xxh64('ab', seed).digest(), x.digest()) self.assertEqual(xxhash.xxh64_digest('ab', seed), x.digest()) x.update('c') self.assertEqual(xxhash.xxh64('abc', seed).digest(), x.digest()) self.assertEqual(xxhash.xxh64_digest('abc', seed), x.digest())
def obtain_index_from_hash(self, string_item): hash_value = xxh64_digest(string_item) index = int.from_bytes(hash_value, byteorder="big") index = index % self._filter_capacity return index
def make_key_u(name): return name.encode()[:8] + xxh64_digest(name)
def make_key(name): return name[:8] + xxh64_digest(name)