def _compute_vectorized(self, *data): result = [] if isinstance(data, tuple): for col in data: result_i = _fillna(col, self.missing_value) result.append(result_i) else: result_0 = _fillna(data, self.missing_value) result.append(result_0) return tuple(result)
def _compute_vectorized(self, lat1, lng1, lat2, lng2): d = _haversine_distance(lat1, lng1, lat2, lng2) if self.method == 'step': num_sim_alg = partial(_step_sim, d, self.offset, self.origin) elif self.method in ['linear', 'lin']: num_sim_alg = partial(_linear_sim, d, self.scale, self.offset, self.origin) elif self.method == 'squared': num_sim_alg = partial(_squared_sim, d, self.scale, self.offset, self.origin) elif self.method in ['exp', 'exponential']: num_sim_alg = partial(_exp_sim, d, self.scale, self.offset, self.origin) elif self.method in ['gauss', 'gaussian']: num_sim_alg = partial(_gauss_sim, d, self.scale, self.offset, self.origin) else: raise ValueError("The algorithm '{}' is not known.".format( self.method)) c = num_sim_alg() c = _fillna(c, self.missing_value) return c
def _compute_vectorized(self, s_left, s_right): if self.method == 'jaro': str_sim_alg = jaro_similarity elif self.method in ['jarowinkler', 'jaro_winkler', 'jw']: str_sim_alg = jarowinkler_similarity elif self.method == 'levenshtein': str_sim_alg = levenshtein_similarity elif self.method in [ 'dameraulevenshtein', 'damerau_levenshtein', 'dl' ]: str_sim_alg = damerau_levenshtein_similarity elif self.method in ['q_gram', 'qgram']: str_sim_alg = qgram_similarity elif self.method == 'cosine': str_sim_alg = cosine_similarity elif self.method in ['smith_waterman', 'smithwaterman', 'sw']: str_sim_alg = smith_waterman_similarity elif self.method in ['longest_common_substring', 'lcs']: str_sim_alg = longest_common_substring_similarity else: raise ValueError("The algorithm '{}' is not known.".format( self.method)) c = str_sim_alg(s_left, s_right) if self.threshold is not None: c = c.where((c < self.threshold) | (pandas.isnull(c)), other=1.0) c = c.where((c >= self.threshold) | (pandas.isnull(c)), other=0.0) c = _fillna(c, self.missing_value) return c