Exemplo n.º 1
0
def hamming(str1, str2):
    """
    Measure the similarity between two strings using Hamming distance, which
    simply gives the number of characters in the strings that are different i.e.
    the number of substitution edits needed to change one string into the other.

    Args:
        str1 (str)
        str2 (str)

    Returns:
        float: similarity between `str1` and `str2` in the interval [0.0, 1.0],
        where larger values correspond to more similar strings

    Note:
        This uses a *modified* Hamming distance in that it permits strings
        of different lengths to be compared.
    """
    len_str1 = len(str1)
    len_str2 = len(str2)
    if len_str1 == len_str2:
        distance = _hamming(str1, str2)
    else:
        # make sure str1 is as long as or longer than str2
        if len_str2 > len_str1:
            str1, str2 = str2, str1
            len_str1, len_str2 = len_str2, len_str1
        # distance is # of different chars + difference in str lengths
        distance = len_str1 - len_str2
        distance += _hamming(str1[:len_str2], str2)
    distance /= len_str1
    return 1.0 - distance
Exemplo n.º 2
0
def hamming(str1, str2, normalize=False):
    """
    Measure the distance between two strings using Hamming distance, which simply
    gives the number of characters in the strings that are different, i.e. the
    number of substitution edits needed to change one string into the other.

    Args:
        str1 (str)
        str2 (str)
        normalize (bool): if True, divide Hamming distance by the total number of
            characters in the longest string; otherwise leave the distance as-is

    Returns:
        int or float: if `normalize` is False, return an int, otherwise return
            a float in the interval [0.0, 1.0], where smaller values correspond
            to more similar strings

    .. note:: This is a *modified* Hamming distance in that it permits strings of
        different lengths to be compared,
    """
    len_str1 = len(str1)
    len_str2 = len(str2)
    if len_str1 == len_str2:
        distance = _hamming(str1, str2)
    else:
        # make sure str1 is as long as or longer than str2
        if len_str2 > len_str1:
            str1, str2 = str2, str1
            len_str1, len_str2 = len_str2, len_str1
        # distance is # of different chars + difference in str lengths
        distance = len_str1 - len_str2
        distance += _hamming(str1[:len_str2], str2)
    if normalize is True:
        distance /= len_str1
    return distance
Exemplo n.º 3
0
def hamming(str1, str2, normalize=False):
    """
    Measure the distance between two strings using Hamming distance, which simply
    gives the number of characters in the strings that are different, i.e. the
    number of substitution edits needed to change one string into the other.

    Args:
        str1 (str)
        str2 (str)
        normalize (bool): if True, divide Hamming distance by the total number of
            characters in the longest string; otherwise leave the distance as-is

    Returns:
        int or float: if `normalize` is False, return an int, otherwise return
            a float in the interval [0.0, 1.0], where smaller values correspond
            to more similar strings

    .. note:: This is a *modified* Hamming distance in that it permits strings of
        different lengths to be compared,
    """
    len_str1 = len(str1)
    len_str2 = len(str2)
    if len_str1 == len_str2:
        distance = _hamming(str1, str2)
    else:
        # make sure str1 is as long as or longer than str2
        if len_str2 > len_str1:
            str1, str2 = str2, str1
            len_str1, len_str2 = len_str2, len_str1
        # distance is # of different chars + difference in str lengths
        distance = len_str1 - len_str2
        distance += _hamming(str1[:len_str2], str2)
    if normalize is True:
        distance /= len_str1
    return distance
Exemplo n.º 4
0
def hamming(str1, str2):
    """
    Measure the similarity between two strings using Hamming distance, which
    simply gives the number of characters in the strings that are different i.e.
    the number of substitution edits needed to change one string into the other.

    Args:
        str1 (str)
        str2 (str)

    Returns:
        float: similarity between `str1` and `str2` in the interval [0.0, 1.0],
            where larger values correspond to more similar strings

    .. note:: This uses a *modified* Hamming distance in that it permits strings
        of different lengths to be compared.
    """
    len_str1 = len(str1)
    len_str2 = len(str2)
    if len_str1 == len_str2:
        distance = _hamming(str1, str2)
    else:
        # make sure str1 is as long as or longer than str2
        if len_str2 > len_str1:
            str1, str2 = str2, str1
            len_str1, len_str2 = len_str2, len_str1
        # distance is # of different chars + difference in str lengths
        distance = len_str1 - len_str2
        distance += _hamming(str1[:len_str2], str2)
    distance /= len_str1
    return 1.0 - distance