def test_simple(self): X = ["The light crashes"] m = MapToSynsets() Z = m.transform(X) self.assertEqual(len(Z), 1) self.assertTrue(isinstance(Z[0], str)) for word in ["light.a.01", "crash.v.01"]: self.assertIn(word, Z[0]) self.assertNotIn("crash.n.02", Z[0])
def build_synset_extraction(binary, min_df, ngram, useTfIdf): if useTfIdf: return make_pipeline( MapToSynsets(), TfidfVectorizer(min_df=min_df, max_df=0.8, sublinear_tf=True, use_idf=True, ngram_range=(1, 3)), ClassifierOvOAsFeatures()) return make_pipeline( MapToSynsets(), CountVectorizer(binary=binary, tokenizer=lambda x: x.split(), min_df=min_df, ngram_range=(1, ngram)), ClassifierOvOAsFeatures())
def build_synset_extraction(binary, min_df, ngram): return make_pipeline(MapToSynsets(), CountVectorizer(binary=binary, tokenizer=lambda x: x.split(), min_df=min_df, ngram_range=(1, ngram)), ClassifierOvOAsFeatures())
def test_fit_returns_self(self): m = MapToSynsets() s = m.fit([]) self.assertEqual(s, m)
def test_empty(self): m = MapToSynsets() Z = m.transform([]) self.assertEqual(len(Z), 0)