def hamming_sim(s1, s2): """ This function computes the Hamming similarity between the two input strings. Args: s1,s2 (string): The input strings for which the similarity measure should be computed. Returns: The Hamming similarity if both the strings are not missing (i.e NaN), else returns NaN. """ if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN # if isinstance(s1, six.string_types): # s1 = gh.remove_non_ascii(s1) # if isinstance(s2, six.string_types): # s2 = gh.remove_non_ascii(s2) # Create the similarity measure object measure = sm.HammingDistance() if not (isinstance(s1, six.string_types) or isinstance(s1, bytes)): s1 = str(s1) if not (isinstance(s2, six.string_types) or isinstance(s2, bytes)): s2 = str(s2) # Call the function to compute the similarity score. return measure.get_sim_score(s1, s2)
def hamming_sim(s1, s2): """ This function computes the Hamming similarity between the two input strings. Args: s1,s2 (string): The input strings for which the similarity measure should be computed. Returns: The Hamming similarity if both the strings are not missing (i.e NaN), else returns NaN. Examples: >>> import py_entitymatching as em >>> em.hamming_sim('alex', 'alxe') 0.5 >>> em.hamming_sim(None, 'alex') nan """ if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN # Create the similarity measure object measure = sm.HammingDistance() s1 = gh.convert_to_str_unicode(s1) s2 = gh.convert_to_str_unicode(s2) # Call the function to compute the similarity score. return measure.get_sim_score(s1, s2)
def hamming_score(self, str_pair, sim_score=True): """ calculate hamming similarity between two strings :return: similarity score or raw score """ s1, s2 = self._check_input(str_pair) if len(s1) != len(s2): return 0 hamming = sm.HammingDistance() return hamming.get_sim_score( s1, s2) if sim_score else hamming.get_raw_score(s1, s2)
def hamming_sim(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) measure = sm.HammingDistance() return measure.get_sim_score(s1, s2)
def hamming_sim(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN # if isinstance(s1, six.string_types): # s1 = gh.remove_non_ascii(s1) # if isinstance(s2, six.string_types): # s2 = gh.remove_non_ascii(s2) # Create the similarity measure object measure = sm.HammingDistance() if not(isinstance(s1, six.string_types) or isinstance(s1, bytes)): s1 = str(s1) if not(isinstance(s2, six.string_types) or isinstance(s2, bytes)): s2 = str(s2) # Call the function to compute the similarity score. return measure.get_sim_score(s1, s2)
def hamming_dist(s1, s2): """ This function computes the Hamming distance between the two input strings. Args: s1,s2 (string): The input strings for which the similarity measure should be computed. Returns: The Hamming distance if both the strings are not missing (i.e NaN), else returns NaN. Examples: >>> import py_entitymatching as em >>> em.hamming_dist('alex', 'john') 4 >>> em.hamming_dist(None, 'john') nan """ if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN # if isinstance(s1, six.string_types): # s1 = gh.remove_non_ascii(s1) # if isinstance(s2, six.string_types): # s2 = gh.remove_non_ascii(s2) # Create the similarity measure object measure = sm.HammingDistance() if not (isinstance(s1, six.string_types) or isinstance(s1, bytes)): s1 = str(s1) if not (isinstance(s2, six.string_types) or isinstance(s2, bytes)): s2 = str(s2) # Call the function to compute the distance return measure.get_raw_score(s1, s2)