def transform_one(self, obs, target, id): df = self.dfTrain[self.dfTrain["search_term"] == obs].copy() val_list = [config.MISSING_VALUE_NUMERIC] if df is not None: df = df[df["id"] != id].copy() df = df[df["relevance"] == self.relevance].copy() if df is not None and df.shape[0] > 0: target_tokens = nlp_utils._tokenize(target, token_pattern) target_ngrams = ngram_utils._ngrams(target_tokens, self.ngram) val_list = [] for x in df[self.target_field]: x_tokens = nlp_utils._tokenize(x, token_pattern) x_ngrams = ngram_utils._ngrams(x_tokens, self.ngram) val_list.append(dist_utils._jaccard_coef(x_ngrams, target_ngrams)) return val_list
def transform_one(self, obs, target, id): df = self.dfTrain[self.dfTrain["search_term"] == obs].copy() val_list = [config.MISSING_VALUE_NUMERIC] if df is not None: df = df[df["id"] != id].copy() df = df[df["relevance"] == self.relevance].copy() if df is not None and df.shape[0] > 0: target_tokens = nlp_utils._tokenize(target, token_pattern) target_ngrams = ngram_utils._ngrams(target_tokens, self.ngram) val_list = [] for x in df[self.target_field]: x_tokens = nlp_utils._tokenize(x, token_pattern) x_ngrams = ngram_utils._ngrams(x_tokens, self.ngram) val_list.append( dist_utils._jaccard_coef(x_ngrams, target_ngrams)) return val_list
def transform_one(self, obs, target, id): obs_tokens = nlp_utils._tokenize(obs, token_pattern) target_tokens = nlp_utils._tokenize(target, token_pattern) obs_ngrams = ngram_utils._ngrams(obs_tokens, self.ngram) target_ngrams = ngram_utils._ngrams(target_tokens, self.ngram) return dist_utils._jaccard_coef(obs_ngrams, target_ngrams)
def distance(self, obs_ngrams, target_ngrams): return dist_utils._jaccard_coef(obs_ngrams, target_ngrams)