Exemplo n.º 1
0
def test_ivis_model_saving(model_filepath):
    model = Ivis(k=15, batch_size=16, n_epochs_without_progress=5)
    iris = datasets.load_iris()
    X = iris.data

    model.fit(X)
    model.save_model(model_filepath)

    model_2 = Ivis()
    model_2.load_model(model_filepath)

    # Check that model predictions are same
    assert np.all(model.transform(X) == model_2.transform(X))
    assert model.__getstate__() == model_2.__getstate__(
    )  # Serializable dict eles same

    # Check all weights are the same
    for model_layer, model_2_layer in zip(model.encoder.layers,
                                          model_2.encoder.layers):
        model_layer_weights = model_layer.get_weights()
        model_2_layer_weights = model_2_layer.get_weights()
        for i in range(len(model_layer_weights)):
            assert np.all(model_layer_weights[i] == model_2_layer_weights[i])

    # Check optimizer weights are the same
    for model_optimizer_weights, model_2_optimizer_weights in zip(
            model.model_.optimizer.get_weights(),
            model_2.model_.optimizer.get_weights()):
        assert np.all(model_optimizer_weights == model_2_optimizer_weights)

    # Check that trying to save over an existing folder raises an Exception
    with pytest.raises(FileExistsError) as exception_info:
        model.save_model(model_filepath)
    assert isinstance(exception_info.value, FileExistsError)
Exemplo n.º 2
0
def _custom_model_saving(model_filepath, save_fn, load_fn):
    iris = datasets.load_iris()
    X = iris.data
    Y = iris.target

    # Create a custom model
    inputs = tf.keras.layers.Input(shape=(X.shape[-1], ))
    x = tf.keras.layers.Dense(8, activation='relu')(inputs)
    custom_model = tf.keras.Model(inputs, x)

    model = Ivis(k=15, batch_size=16, epochs=2, model=custom_model)

    model.fit(X, Y)
    save_fn(model, model_filepath)
    model_2 = load_fn(model_filepath)

    # Check that model embeddings are same
    assert np.all(model.transform(X) == model_2.transform(X))
    # Check that model supervised predictions are same
    assert np.all(model.score_samples(X) == model_2.score_samples(X))

    _validate_network_equality(model, model_2)

    # Train new model
    y_pred_2 = model_2.fit_transform(X, Y)
Exemplo n.º 3
0
class DFIvis(BaseEstimator, TransformerMixin):
    # NOTE:
    # - DFIvis(embedding_dims=df.shape[1]) to remain every dimensions
    def __init__(self, columns=None, prefix='ivis_', **kwargs):
        self.columns = columns
        self.prefix = prefix
        self.model = Ivis(**kwargs)
        self.transform_cols = None

    def fit(self, X, y=None):
        self.columns = X.columns if self.columns is None else self.columns
        self.transform_cols = [x for x in X.columns if x in self.columns]
        self.model.fit(X[self.transform_cols].values,
                       y.values if y is not None else y)

        return self

    def transform(self, X):
        if self.transform_cols is None:
            raise NotFittedError(
                f"This {self.__class__.__name__} instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator."
            )

        new_X = pd.DataFrame(
            self.model.transform(X[self.transform_cols].values),
            columns=[
                f'{self.prefix}{x}' for x in range(self.model.embedding_dims)
            ])

        return new_X

    def fit_transform(self, X, y=None):
        return self.fit(X, y).transform(X)
Exemplo n.º 4
0
def test_supervised_model_saving(model_filepath):
    model = Ivis(k=15,
                 batch_size=16,
                 epochs=5,
                 supervision_metric='sparse_categorical_crossentropy')
    iris = datasets.load_iris()
    X = iris.data
    Y = iris.target

    model.fit(X, Y)
    model.save_model(model_filepath, overwrite=True)

    model_2 = Ivis()
    model_2.load_model(model_filepath)

    # Check that model embeddings are same
    assert np.all(model.transform(X) == model_2.transform(X))
    # Check that model supervised predictions are same
    assert np.all(model.score_samples(X) == model_2.score_samples(X))
    # Serializable dict eles same
    assert model.__getstate__() == model_2.__getstate__()

    # Check all weights are the same
    for model_layer, model_2_layer in zip(model.encoder.layers,
                                          model_2.encoder.layers):
        model_layer_weights = model_layer.get_weights()
        model_2_layer_weights = model_2_layer.get_weights()
        for i in range(len(model_layer_weights)):
            assert np.all(model_layer_weights[i] == model_2_layer_weights[i])

    # Check optimizer weights are the same
    for w1, w2 in zip(model.model_.optimizer.get_weights(),
                      model_2.model_.optimizer.get_weights()):
        assert np.all(w1 == w2)

    # Check that trying to save over an existing folder raises an Exception
    with pytest.raises(FileExistsError) as exception_info:
        model.save_model(model_filepath)
        assert isinstance(exception_info.value, FileExistsError)

    # Check that can overwrite existing model if requested
    model.save_model(model_filepath, overwrite=True)

    # Train new model
    y_pred_2 = model_2.fit_transform(X, Y)
Exemplo n.º 5
0
def test_custom_model_saving(model_filepath):
    iris = datasets.load_iris()
    X = iris.data
    Y = iris.target

    # Create a custom model
    inputs = tf.keras.layers.Input(shape=(X.shape[-1], ))
    x = tf.keras.layers.Dense(128, activation='relu')(inputs)
    custom_model = tf.keras.Model(inputs, x)

    model = Ivis(k=15,
                 batch_size=16,
                 epochs=5,
                 supervision_metric='sparse_categorical_crossentropy',
                 model=custom_model)

    model.fit(X, Y)
    model.save_model(model_filepath, overwrite=True)

    model_2 = Ivis()
    model_2.load_model(model_filepath)

    # Check that model embeddings are same
    assert np.all(model.transform(X) == model_2.transform(X))
    # Check that model supervised predictions are same
    assert np.all(model.score_samples(X) == model_2.score_samples(X))
    # Serializable dict eles same
    assert model.__getstate__() == model_2.__getstate__()

    # Check all weights are the same
    for model_layer, model_2_layer in zip(model.encoder.layers,
                                          model_2.encoder.layers):
        model_layer_weights = model_layer.get_weights()
        model_2_layer_weights = model_2_layer.get_weights()
        for i in range(len(model_layer_weights)):
            assert np.all(model_layer_weights[i] == model_2_layer_weights[i])

    # Check optimizer weights are the same
    for w1, w2 in zip(model.model_.optimizer.get_weights(),
                      model_2.model_.optimizer.get_weights()):
        assert np.all(w1 == w2)

    # Train new model
    y_pred_2 = model_2.fit_transform(X, Y)
Exemplo n.º 6
0
class Ivis(Transformer):
    """
    This transformer scales all the vectors in an [EmbeddingSet][whatlies.embeddingset.EmbeddingSet]
    by means of Ivis algorithm. We're using the implementation found
    [here](https://github.com/beringresearch/ivis).

    Important:
        This language backend might require you to manually install extra dependencies
        unless you installed via either;

        ```
        pip install whatlies[ivis]
        pip install whatlies[all]
        ```

    Arguments:
        n_components: the number of compoments to create/add
        kwargs: keyword arguments passed to the [Ivis implementation](https://bering-ivis.readthedocs.io/en/latest/hyperparameters.html)

    Usage:

    ```python
    from whatlies.language import GensimLanguage
    from whatlies.transformers import Ivis

    words = ["prince", "princess", "nurse", "doctor", "banker", "man", "woman",
             "cousin", "neice", "king", "queen", "dude", "guy", "gal", "fire",
             "dog", "cat", "mouse", "red", "bluee", "green", "yellow", "water",
             "person", "family", "brother", "sister"]

    lang = GensimLanguage("wordvectors.kv")
    emb = lang[words]
    emb.transform(Ivis(3)).plot_interactive_matrix('ivis_0', 'ivis_1', 'ivis_2')
    ```
    """
    def __init__(self, n_components=2, **kwargs):
        super().__init__()
        self.n_components = n_components
        self.kwargs = kwargs
        self.kwargs["verbose"] = 0
        self.tfm = IVIS(embedding_dims=self.n_components, **self.kwargs)

    def fit(self, embset):
        names, X = embset.to_names_X()
        self.tfm.fit(X)
        self.is_fitted = True
        return self

    def transform(self, embset):
        names, X = embset.to_names_X()
        new_vecs = self.tfm.transform(X)
        names_out = names + [f"ivis_{i}" for i in range(self.n_components)]
        vectors_out = np.concatenate([new_vecs, np.eye(self.n_components)])
        new_dict = new_embedding_dict(names_out, vectors_out, embset)
        return EmbeddingSet(new_dict,
                            name=f"{embset.name}.ivis_{self.n_components}()")
Exemplo n.º 7
0
def _unsupervised_model_save_test(model_filepath, save_fn, load_fn):
    model = Ivis(k=15, batch_size=16, epochs=2)
    iris = datasets.load_iris()
    X = iris.data

    model.fit(X)
    save_fn(model, model_filepath)
    model_2 = load_fn(model_filepath)

    # Check that model predictions are same
    assert np.all(model.transform(X) == model_2.transform(X))
    _validate_network_equality(model, model_2)

    # Train new model
    y_pred_2 = model_2.fit_transform(X)
Exemplo n.º 8
0
def test_regression():
    (x_train, y_train), (x_test, y_test) = boston_housing.load_data()

    supervision_metric = 'mae'
    ivis_boston = Ivis(k=15,
                       batch_size=16,
                       epochs=5,
                       supervision_metric=supervision_metric)
    ivis_boston.fit(x_train, y_train)

    embeddings = ivis_boston.transform(x_train)
    y_pred = ivis_boston.score_samples(x_train)

    assert ivis_boston.model_.loss['supervised'] == 'mae'
    assert ivis_boston.model_.layers[-1].activation.__name__ == 'linear'
    assert ivis_boston.model_.layers[-1].output_shape[-1] == 1
Exemplo n.º 9
0
def ivis_reduce(docvecs, label, ivis_model, use_nn, **kwargs):
    if use_nn:
        if not ivis_model:
            print(f"Train ivis...")
            ivis_model = Ivis(embedding_dims=1, k=15, model="maaten",
                              n_epochs_without_progress=15, verbose=0,
                              batch_size=128)
            if -1 in label.unique() and label.value_counts()[-1] == label.shape[0]:
                print("No labeled data found.")
                ivis_model = ivis_model.fit(docvecs)
            else:
                ivis_model = ivis_model.fit(
                    docvecs, Y=label.to_numpy())

        dim_reduced_vecs = ivis_model.transform(docvecs)
        decision_scores = dim_reduced_vecs.astype(float)
        return decision_scores, ivis_model
    else:
        return docvecs, None
Exemplo n.º 10
0
def test_h5_file(h5_filepath):
    rows, dims = 258, 32
    create_random_dataset(h5_filepath, rows, dims)

    # Load data
    test_index = rows // 5
    X_train = HDF5Matrix(h5_filepath, 'data', start=0, end=test_index)
    y_train = HDF5Matrix(h5_filepath, 'labels', start=0, end=test_index)

    X_test = HDF5Matrix(h5_filepath, 'data', start=test_index, end=rows)
    y_test = HDF5Matrix(h5_filepath, 'labels', start=test_index, end=rows)

    # Train and transform with ivis
    ivis_iris = Ivis(epochs=5, k=15, batch_size=16)

    y_pred_iris = ivis_iris.fit_transform(X_train, shuffle_mode='batch')
    y_pred = ivis_iris.transform(X_test)

    assert y_pred.shape[0] == len(X_test)
    assert y_pred.shape[1] == ivis_iris.embedding_dims
Exemplo n.º 11
0
def _supervised_model_save_test(model_filepath, save_fn, load_fn):
    model = Ivis(k=15,
                 batch_size=16,
                 epochs=2,
                 supervision_metric='sparse_categorical_crossentropy')
    iris = datasets.load_iris()
    X = iris.data
    Y = iris.target

    model.fit(X, Y)
    save_fn(model, model_filepath)
    model_2 = load_fn(model_filepath)

    # Check that model embeddings are same
    assert np.all(model.transform(X) == model_2.transform(X))
    # Check that model supervised predictions are same
    assert np.all(model.score_samples(X) == model_2.score_samples(X))

    _validate_network_equality(model, model_2)

    # Train new model
    y_pred_2 = model_2.fit_transform(X, Y)
Exemplo n.º 12
0
Arquivo: iris.py Projeto: yyht/ivis
iris dataset
============

Example of reducing dimensionality of the iris dataset using ivis.
"""

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler

from ivis import Ivis

sns.set(context='paper', style='white')

X = load_iris().data
X = MinMaxScaler().fit_transform(X)

ivis = Ivis(k=5, model='maaten', verbose=0)
ivis.fit(X)

embeddings = ivis.transform(X)

plt.figure(figsize=(5, 5), dpi=100)
plt.scatter(embeddings[:, 0], embeddings[:, 1], c=load_iris().target, s=20)
plt.xlabel('ivis 1')
plt.ylabel('ivis 2')
plt.title('ivis embeddings of the iris dataset')

plt.show()
Exemplo n.º 13
0
                    random_state=42,
                    verbose=True)
umap_reducer = umap_reducer.fit(list(docvecs))
dim_reduced_vecs = umap_reducer.transform(list(docvecs))
if not use_ivis:
    decision_scores = dim_reduced_vecs.astype(float)

# %%
# Ivis
if use_ivis:
    ivis_reducer = Ivis(embedding_dims=1,
                        k=15,
                        model="maaten",
                        n_epochs_without_progress=15)
    ivis_reducer = ivis_reducer.fit(dim_reduced_vecs)
    dim_reduced_vecs = ivis_reducer.transform(dim_reduced_vecs)
    decision_scores = dim_reduced_vecs.astype(float)

# %%
iqrout = IQROutlier(contamination=0.1)
iqrout = iqrout.fit(decision_scores)

preds = iqrout.transform(decision_scores)
scores = get_scores(dict(), df["outlier_label"], preds)
scores

# %%
# validate

df_val = pd.read_csv("/home/philipp/projects/dad4td/data/raw/amazon.csv",
                     names=["stars", "head", "text"])