Пример #1
0
 def test_empty_blocks(self, simil_df):
     X = simil_df.copy()
     X['blocks'].iloc[1] = ['']
     assert (SimilaritySelector().fit(
         X['blocks'], X['Ingrédients']).predict(X['blocks'])[1] == '')
     model = SimilaritySelector().fit(X['blocks'], X['Ingrédients'])
     model.predict([X['blocks'].iloc[0]])
     assert (model.predict([['']]) == np.array([''])).all()
Пример #2
0
 def test_hashing_type(self, simil_df):
     model = SimilaritySelector(count_vect_type='HashingVectorizer', )
     model.fit(simil_df['blocks'], simil_df['Ingrédients'])
     out_ds = model.predict(simil_df['blocks'])
     target_data = ['100% sucre', 'E110, farine', 'haricots']
     target_ds = pd.Series(
         target_data,
         simil_df.index,
     )
     assert pd.Series(out_ds).equals(target_ds)
Пример #3
0
 def test_predict_cosine(self, simil_df):
     model = SimilaritySelector(similarity='cosine', )
     model.fit(simil_df['blocks'], simil_df['Ingrédients'])
     out_ds = model.predict(simil_df['blocks'])
     target_data = ['100% sucre', 'E110, farine', 'haricots']
     target_ds = pd.Series(
         target_data,
         simil_df.index,
     )
     assert pd.Series(out_ds).equals(target_ds)
Пример #4
0
 def test_predict(self, simil_df):
     transformer = SimilaritySelector().fit(simil_df['blocks'],
                                            simil_df['Ingrédients'])
     test_blocks = [[
         'fabriqué en Italie', 'mélange de nougat',
         'sucre, eau et betteraves'
     ]]
     assert (all(
         transformer.predict(test_blocks) == pd.Series(
             ['sucre, eau et betteraves'])))
Пример #5
0
 def test_embedding(self, simil_df):
     with pytest.raises(ValueError):
         (SimilaritySelector(embedding_method='incorrect').fit(
             simil_df['blocks'], simil_df['Ingrédients']))
     model = SimilaritySelector(embedding_method='Word2Vec',
                                count_vect_kwargs={'stop_words': {'de'}})
     model.fit(simil_df['blocks'], simil_df['Ingrédients'])
     out_ds = model.predict(simil_df['blocks'])
     target_data = ['100% sucre', 'E110, farine', 'haricots']
     target_ds = pd.Series(
         target_data,
         simil_df.index,
     )
     assert pd.Series(out_ds).equals(target_ds)
     embed_parms = {'n_components': 5}
     model = SimilaritySelector(embedding_method='tSVD',
                                embedding_parms=embed_parms)
     model.fit(simil_df['blocks'], simil_df['Ingrédients'])
     out_ds = model.predict(simil_df['blocks'])
     assert pd.Series(out_ds).equals(target_ds)
Пример #6
0
 def test_cosine_with_score(self, simil_df):
     # absolute scoring
     model = SimilaritySelector(similarity='cosine',
                                scoring='absolute_score')
     model.fit(simil_df['blocks'], simil_df['Ingrédients'])
     out_ds = model.predict(simil_df['blocks'])
     target_data = ['100% sucre', 'E110, farine', 'haricots']
     target_ds = pd.Series(
         target_data,
         simil_df.index,
     )
     assert pd.Series(out_ds).equals(target_ds)
     # relative scoring
     model = SimilaritySelector(similarity='cosine',
                                scoring='relative_score')
     model.fit(simil_df['blocks'], simil_df['Ingrédients'])
     out_ds = model.predict(simil_df['blocks'])
     target_data = ['100% sucre', 'E110, farine', 'haricots']
     target_ds = pd.Series(
         target_data,
         simil_df.index,
     )
     assert pd.Series(out_ds).equals(target_ds)
Пример #7
0
 def test_l_norm_values(self, simil_df):
     l2_norm = partial(sparse_norm, axis=1, ord=2)
     model = SimilaritySelector(
         similarity='projection',
         source_norm='l3',
         projected_norm=l2_norm,
     )
     model.fit(simil_df['blocks'], simil_df['Ingrédients'])
     out_ds = model.predict(simil_df['blocks'])
     target_data = ['100% sucre', 'E110, farine', 'haricots']
     target_ds = pd.Series(
         target_data,
         simil_df.index,
     )
     assert pd.Series(out_ds).equals(target_ds)
Пример #8
0
 def test_predict_no_transform(self, simil_df):
     transformer = SimilaritySelector().fit(simil_df['blocks'],
                                            simil_df['Ingrédients'])
     assert (all(
         transformer.predict([['haricot', 'exploité en Inde']]) ==
         pd.Series(['haricot'])))