def hamming(str1, str2): """ Measure the similarity between two strings using Hamming distance, which simply gives the number of characters in the strings that are different i.e. the number of substitution edits needed to change one string into the other. Args: str1 (str) str2 (str) Returns: float: similarity between `str1` and `str2` in the interval [0.0, 1.0], where larger values correspond to more similar strings Note: This uses a *modified* Hamming distance in that it permits strings of different lengths to be compared. """ len_str1 = len(str1) len_str2 = len(str2) if len_str1 == len_str2: distance = _hamming(str1, str2) else: # make sure str1 is as long as or longer than str2 if len_str2 > len_str1: str1, str2 = str2, str1 len_str1, len_str2 = len_str2, len_str1 # distance is # of different chars + difference in str lengths distance = len_str1 - len_str2 distance += _hamming(str1[:len_str2], str2) distance /= len_str1 return 1.0 - distance
def hamming(str1, str2, normalize=False): """ Measure the distance between two strings using Hamming distance, which simply gives the number of characters in the strings that are different, i.e. the number of substitution edits needed to change one string into the other. Args: str1 (str) str2 (str) normalize (bool): if True, divide Hamming distance by the total number of characters in the longest string; otherwise leave the distance as-is Returns: int or float: if `normalize` is False, return an int, otherwise return a float in the interval [0.0, 1.0], where smaller values correspond to more similar strings .. note:: This is a *modified* Hamming distance in that it permits strings of different lengths to be compared, """ len_str1 = len(str1) len_str2 = len(str2) if len_str1 == len_str2: distance = _hamming(str1, str2) else: # make sure str1 is as long as or longer than str2 if len_str2 > len_str1: str1, str2 = str2, str1 len_str1, len_str2 = len_str2, len_str1 # distance is # of different chars + difference in str lengths distance = len_str1 - len_str2 distance += _hamming(str1[:len_str2], str2) if normalize is True: distance /= len_str1 return distance
def hamming(str1, str2): """ Measure the similarity between two strings using Hamming distance, which simply gives the number of characters in the strings that are different i.e. the number of substitution edits needed to change one string into the other. Args: str1 (str) str2 (str) Returns: float: similarity between `str1` and `str2` in the interval [0.0, 1.0], where larger values correspond to more similar strings .. note:: This uses a *modified* Hamming distance in that it permits strings of different lengths to be compared. """ len_str1 = len(str1) len_str2 = len(str2) if len_str1 == len_str2: distance = _hamming(str1, str2) else: # make sure str1 is as long as or longer than str2 if len_str2 > len_str1: str1, str2 = str2, str1 len_str1, len_str2 = len_str2, len_str1 # distance is # of different chars + difference in str lengths distance = len_str1 - len_str2 distance += _hamming(str1[:len_str2], str2) distance /= len_str1 return 1.0 - distance