def extract(self, train_instance): pos_sa, pos_sb = train_instance.get_word(type='pos', stopwords=True) features, infos = utils.sentence_vectorize_features(pos_sa, pos_sb, self.idf_weight, convey='count') return features, infos
def extract(self, train_instance): sa, sb = train_instance.get_word(type='lemma', stopwords=self.stopwords, lower=True) features, infos = utils.sentence_vectorize_features( sa, sb, self.idf_weight) return features, infos
def extract(self, train_instance): idf_weight = dict_utils.DictLoader().load_dict('global_idf') sa, sb = train_instance.get_word(type='lemma', stopwords=True, lower=True) features, infos = utils.sentence_vectorize_features(sa, sb, idf_weight) return features, infos
def extract(self, train_instance): dep_sa, dep_sb = train_instance.get_dependency() features = [] feature, info = utils.sentence_match_features(dep_sa, dep_sb) features += feature feature, info = utils.sentence_vectorize_features(dep_sa, dep_sb, self.idf_weight, convey=self.convey) features += feature infos = [dep_sa, dep_sb] return features, infos
def extract(self, train_instance): sa, sb = train_instance.get_preprocess() # sa, sb = train_instance.get_word(type='lemma', stopwords=True, lower=True) la, lb = len(sa), len(sb) l = min(la, lb) features = [] feature, info = utils.sentence_sequence_features(sa, sb) features += feature feature, info = utils.sentence_match_features(sa, sb) features += feature bow = utils.idf_calculator([sa, sb]) feature, info = utils.sentence_vectorize_features(sa, sb, bow, convey='count') features += feature infos = [sa, sb] return features, infos