def transform_one(self, obs, target, id): val_list = [] obs_tokens = nlp_utils._tokenize(obs, token_pattern) target_tokens = nlp_utils._tokenize(target, token_pattern) for obs_token in obs_tokens: _val_list = [] if obs_token in self.model: for target_token in target_tokens: if target_token in self.model: sim = dist_utils._cosine_sim(self.model[obs_token], self.model[target_token]) _val_list.append(sim) if len(_val_list) == 0: _val_list = [config.MISSING_VALUE_NUMERIC] val_list.append( _val_list ) if len(val_list) == 0: val_list = [[config.MISSING_VALUE_NUMERIC]] return val_list
def _get_cosine_sim(self, sent1, sent2): vect1 = self._get_vector(sent1) vect2 = self._get_vector(sent2) return dist_utils._cosine_sim(vect1, vect2)