def _get_bucket(self, key): """ Hashes a key string and returns the corresponding bucket. :param key: a key string :return: a bucket (list) """ return self.buckets[shash(key) % self.bucket_count]
def test_shash(self): # How many buckets we will use. bucket_count = 100 # The test fails if the population standard deviation of the buckets is bigger than the product of the tolerance # and the mean of the buckets. tolerance = 0.05 buckets = [0 for n in range(bucket_count)] # Load a lot of words. with open('../words.txt', 'r') as words: for line in words.readlines(): buckets[shash(line) % bucket_count] += 1 self.failIf(pstdev(buckets) > tolerance * mean(buckets))
def find(text, substr): """ Attempts to find a substring in a string of text. :param text: the string of text :param substr: the substring :return: the index of the first character of the first appearance of the substring in the string or -1 """ text_size = len(text) substr_size = len(substr) if text_size < substr_size: return -1 last_hash = shash(text[0:substr_size]) substr_hash = shash(substr) if substr_hash == last_hash: if text[0:substr_size] == substr: return 0 for i in range(1, text_size - substr_size + 1): last_hash = shash_roll(last_hash, substr_size, text[i - 1], text[substr_size + i - 1]) if substr_hash == last_hash: if text[i:substr_size + i] == substr: return i return -1