Exemplo n.º 1
0
 def calc_onehash(shingle, seed):
     def c4_hash(shingle):
         h = struct.unpack('<i',shingle)[0]
         return  h % ((sys.maxsize + 1) * 2)
     if self.sh_type == 'c4':
         return operator.xor(c4_hash(shingle), long(seed)) % self.modulo
     else:
         return operator.xor(compute_positive_hash(shingle), long(seed)) % self.modulo
Exemplo n.º 2
0
        def calc_onehash(sh_type, shingle, seed, modulo):
            def c4_hash(shingle):
                h = struct.unpack('<i', shingle)[0]
                return h % ((sys.maxsize + 1) * 2)

            if sh_type == 'c4':
                return operator.xor(c4_hash(shingle), long(seed)) % modulo
            else:
                return operator.xor(compute_positive_hash(shingle),
                                    long(seed)) % modulo
    def _calculate_hash(self, obj):
        """
            This method computes hash of object using a dynamic hashing function.
            :param obj: object we are computing hash code for.
            :return: hash code (long integer)
        """

        # hash value to positive integer
        h1 = compute_positive_hash(obj)

        #calculate hash code: h(obj) mod 2^b (where b is num_bands)
        return h1 % 2**self.num_bands
    def _calculate_hash(self, obj):
        """
            This method computes hash of object using a dynamic hashing function.
            :param obj: object we are computing hash code for.
            :return: hash code (long integer)
        """

        # hash value to positive integer
        h1 = compute_positive_hash(obj)

        #calculate hash code: h(obj) mod 2^b (where b is num_bands)
        return h1 % 2**self.num_bands
 def calc_onehash(shingle, seed):
     def c4_hash(shingle):
         h = struct.unpack('<i',shingle)[0]
         hash_val = h & max_mask
         return hash_val
         # hash_val = shingle_cache.get(shingle)
         # if hash_val:
         #     return hash_val
         # h = struct.unpack('<i',shingle)[0]
         # hash_val = h & max_mask
         # shingle_cache.set(shingle, hash_val)
         # return hash_val
     if self.sh_type == 'c4':
         return operator.xor(c4_hash(shingle), long(seed)) % self.modulo
     else:
         return operator.xor(compute_positive_hash(shingle), long(seed)) % self.modulo
            def calc_onehash(shingle, seed):
                def c4_hash(shingle):
                    try:
                        h = struct.unpack('<i',shingle)[0]
                    except struct.error:
                        # We land here when the shingle has non-ascii characters in it.
                        size = 4
                        encoded = shingle.encode('utf-8')
                        int_hashes = [int(encoded[i:i + size].encode('hex'), 16) for i in range(len(encoded)/size)] 
                        h = reduce(operator.xor, int_hashes)
                    hash_val = h & settings.max_mask
                    return hash_val

                if self.sh_type == 'c4':
                    return operator.xor(c4_hash(shingle), long(seed)) % self.modulo
                else:
                    return operator.xor(compute_positive_hash(shingle), long(seed)) % self.modulo
            def calc_onehash(shingle, seed):
                def c4_hash(shingle):
                    try:
                        h = struct.unpack('<i', shingle)[0]
                    except struct.error:
                        # We land here when the shingle has non-ascii characters in it.
                        size = 4
                        encoded = shingle.encode('utf-8')
                        int_hashes = [
                            int(encoded[i:i + size].encode('hex'), 16)
                            for i in range(len(encoded) / size)
                        ]
                        h = reduce(operator.xor, int_hashes)
                    hash_val = h & settings.max_mask
                    return hash_val

                if self.sh_type == 'c4':
                    return operator.xor(c4_hash(shingle),
                                        long(seed)) % self.modulo
                else:
                    return operator.xor(compute_positive_hash(shingle),
                                        long(seed)) % self.modulo
Exemplo n.º 8
0
def calc_hash(value):
     return compute_positive_hash(value)