def test_tm_njobs(): X, y = get_data() evo = EvoMSA(tm_n_jobs=2, n_jobs=1, TH=True, lang="es", stacked_method="sklearn.svm.LinearSVC").fit(X, y) evo.predict(X) assert evo.n_jobs == 1 assert evo.tm_n_jobs == 2
def test_cache(): import hashlib def func(data, output): from b4msa.textmodel import TextModel from microtc.utils import tweet_iterator, save_model tm = TextModel().fit(list(tweet_iterator(data))) save_model(tm, output) with StoreDelete(func, TWEETS, "textmodel_cache.tm") as sd: cache = os.path.join("tm", "train.json") evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]], cache=cache) assert os.path.isdir("tm") output = hashlib.md5(sd._output.encode()).hexdigest() output = cache + "-%s" % output print(evo.cache.textModels) assert evo.cache.textModels[1] == output X, y = get_data() evo.first_stage(X, y) assert os.path.isfile(output) ML = list(evo.cache.ml_train()) ML_K = list(evo.cache.ml_kfold()) evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]], stacked_method_args=dict(popsize=10, early_stopping_rounds=10, n_estimators=3), cache=cache).fit(X, y, test_set=X[:30]) hy = evo.predict(X[:10]) print(len(hy), hy) assert len(hy) == 10 for k in ML: print(k) assert os.path.isfile(k) for k in ML_K: print(k) assert os.path.isfile(k) cache = os.path.join("tm", "test") evo.predict(X, cache=cache) output = cache + '-' + output.split("-")[1] print(output) assert os.path.isfile(output)
def test_EvoMSA_evodag_class(): from sklearn.neighbors import NearestCentroid import numpy as np X, y = get_data() model = EvoMSA(models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']], stacked_method="sklearn.neighbors.NearestCentroid", TR=False, n_jobs=2).fit(X, y) assert isinstance(model._evodag_model, NearestCentroid) cl = model.predict(X) hy = model.predict_proba(X) cl2 = model._le.inverse_transform(hy.argmax(axis=1)) print(cl, cl2) assert np.all(cl == cl2)
def test_EvoMSA_predict(): import numpy as np X, y = get_data() evo = EvoMSA(stacked_method_args=dict(popsize=10, early_stopping_rounds=10, time_limit=15, n_estimators=10), models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']], n_jobs=1).fit(X, y) hy = evo.predict(X) assert len(hy) == 1000 print((np.array(y) == hy).mean(), hy) print(evo.predict_proba(X)) assert (np.array(y) == hy).mean() > 0.8
def test_binary_labels_json(): import json X, y = get_data() h = dict(NONE=0, N=0, NEU=0, P=1) y = [h[x] for x in y] evo = EvoMSA(evodag_args=dict(popsize=10, early_stopping_rounds=10, time_limit=5, n_estimators=5), n_jobs=2).fit(X, y) hy = evo.predict(X) for x in hy: print(type(x), str(x)) _ = json.dumps(dict(klass=str(x))) print(_)
def test_EvoMSA_predict(): import numpy as np X, y = get_data() evo = EvoMSA( evodag_args=dict(popsize=10, early_stopping_rounds=10, time_limit=15, n_estimators=10), models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']], n_jobs=1).fit([X, [x for x, y0 in zip(X, y) if y0 in ['P', 'N']]], [y, [x for x in y if x in ['P', 'N']]]) hy = evo.predict(X) assert len(hy) == 1000 print((np.array(y) == hy).mean(), hy) print(evo.predict_proba(X)) assert (np.array(y) == hy).mean() > 0.8
def test_EvoMSA_identity(): from EvoMSA.model import Identity import numpy as np X, y = get_data() model = EvoMSA(evodag_args=dict(popsize=10, early_stopping_rounds=10, n_estimators=3), models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']], TR=False, evodag_class="EvoMSA.model.Identity", n_jobs=2).fit(X, y) assert isinstance(model._evodag_model, Identity) cl = model.predict(X) hy = model.predict_proba(X) cl2 = model._le.inverse_transform(hy.argmax(axis=1)) print(cl, cl2) assert np.all(cl == cl2)
def test_EvoMSA_evodag_class(): from sklearn.neighbors import NearestCentroid import numpy as np X, y = get_data() model = EvoMSA(evodag_args=dict(popsize=10, early_stopping_rounds=10, n_estimators=3), models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']], evodag_class="sklearn.neighbors.NearestCentroid", TR=False, n_jobs=2).fit(X, y) assert isinstance(model._evodag_model, NearestCentroid) cl = model.predict(X) hy = model.predict_proba(X) cl2 = model._le.inverse_transform(hy.argmax(axis=1)) print(cl, cl2) assert np.all(cl == cl2)
def test_EvoMSA_regression(): from EvoMSA.base import LabelEncoderWrapper from EvoMSA.utils import download X, y = get_data() X = [dict(text=x) for x in X] l = LabelEncoderWrapper().fit(y) y = l.transform(y) - 1.5 evo = EvoMSA(stacked_method_args=dict(popsize=10, early_stopping_rounds=10, time_limit=5, n_estimators=2), classifier=False, models=[[download("emo_Es.tm"), 'EvoMSA.model.Identity']], TR=False, n_jobs=1).fit(X, y) assert evo df = evo.decision_function(X) print(df.shape, df.ndim) assert df.shape[0] == len(X) and df.ndim == 1 df = evo.predict(X) assert df.shape[0] == len(X) and df.ndim == 1