Exemplo n.º 1
0
def test_plot_nested_submodels(teardown, tmp_path, levels, expand_nested):

    submodels = [LogisticRegression()]
    for level in range(levels):
        sub_model = build_submodel(submodels[level], level + 1)
        submodels.append(sub_model)

    x = Input(name="x")
    y_t = Input(name="y_t")
    y_p = submodels[-1](x, y_t)
    model = Model(x, y_p, y_t)

    filename = str(tmp_path / "test_plot_model.png")
    plot_model(model, filename, show=False, expand_nested=expand_nested)
Exemplo n.º 2
0
def test_plot_shared_submodel(teardown, tmp_path, expand_nested):
    xs, y_ts, y_ps = [], [], []
    submodel = build_submodel(LogisticRegression(), 0)

    for i in range(2):
        x = Input(name="x{}".format(i))
        y_t = Input(name="y_t{}".format(i))
        y_p = submodel(x, y_t)
        xs.append(x)
        y_ts.append(y_t)
        y_ps.append(y_p)

    model = Model(xs, y_ps, y_ts)

    filename = str(tmp_path / PNG_FILENAME)
    plot_model(model, filename, show=False, expand_nested=expand_nested)
Exemplo n.º 3
0
def test_plot_independent_submodels(teardown, tmp_path, expand_nested):
    xs, y_ts, y_ps, steps = [], [], [], []

    for i in range(3):
        step = (LogisticRegression() if i == 0 else build_submodel(
            LogisticRegression(), i))
        x = Input(name="x{}".format(i))
        y_t = Input(name="y_t{}".format(i))
        y_p = step(x, y_t)
        xs.append(x)
        y_ts.append(y_t)
        y_ps.append(y_p)

    model = Model(xs, y_ps, y_ts)

    filename = str(tmp_path / "test_plot_model.png")
    plot_model(model, filename, show=False, expand_nested=expand_nested)
Exemplo n.º 4
0
def test_plot_model(teardown, tmp_path):
    x1 = Input(name="x1")
    x2 = Input(name="x2")
    y1, y2 = DummyMIMO()([x1, x2])
    submodel = Model([x1, x2], [y1, y2], name="submodel")

    x = Input(name="x")
    h1, h2 = DummySIMO()(x)
    z1, z2 = submodel([h1, h2])

    u = Input(name="u")
    v = DummySISO()(u)

    w = DummyMISO()([z1, z2])
    model = Model([x, u], [w, v], name="main_model")

    filename = str(tmp_path / "test_plot_model.png")
    plot_model(model, filename, show=False, expand_nested=True)
Exemplo n.º 5
0
def test_plot_big_model(teardown, tmp_path, expand_nested):
    # Below is a very contrived big dummy model

    # ------- Sub-model 1
    x1_sub1 = Input(name="x1_sub1")
    x2_sub1 = Input(name="x2_sub1")
    y_t_sub1 = Input(name="y_t_sub1")
    y_p1_sub1, y_p2_sub1 = DummyMIMO()([x1_sub1, x2_sub1], y_t_sub1)
    submodel1 = Model([x1_sub1, x2_sub1], [y_p1_sub1, y_p2_sub1],
                      y_t_sub1,
                      name="submodel1")

    # ------- Sub-model 2
    y_t_sub2 = Input(name="y_t_sub2")
    y_p_sub2 = DummySISO()(y_t_sub2)
    submodel2 = Model(y_t_sub2, y_p_sub2, name="submodel2")

    # ------- Sub-model 3
    x1_sub3 = Input(name="x1_sub3")
    x2_sub3 = Input(name="x2_sub3")
    y_p1_sub3, y_p2_sub3 = DummyMIMO()([x1_sub3, x2_sub3])
    submodel3 = Model([x1_sub3, x2_sub3], [y_p1_sub3, y_p2_sub3],
                      name="submodel3")

    # ------- Main model
    x = Input(name="x")
    y_t = Input(name="y_t")
    y_t_trans = submodel2(y_t)
    h1, h2 = DummySIMO()(x)
    g1, g2 = submodel3([h1, h2])
    z1, z2 = submodel1([g1, g2], y_t_trans)
    w = DummyMISO()([z1, z2])

    model = Model(x, w, y_t, name="main_model")

    filename = str(tmp_path / "test_plot_model.png")
    plot_model(model, filename, show=False, expand_nested=expand_nested)
Exemplo n.º 6
0
squeeze = Lambda(np.squeeze, axis=1)

ys_t = Split(n_targets, axis=1)(y_t)
ys_p = []
for j, k in enumerate(order):
    x_stacked = ColumnStack()(inputs=[x, *ys_p[:j]])
    ys_t[k] = squeeze(ys_t[k])
    ys_p.append(LogisticRegression(solver="lbfgs")(x_stacked, ys_t[k]))

ys_p = [ys_p[order.index(j)] for j in range(n_targets)]
y_p = ColumnStack()(ys_p)

model = Model(inputs=x, outputs=y_p, targets=y_t)
# This might take a few seconds
plot_model(model, filename="classifier_chain.png", dpi=96)

# ------- Train model
model.fit(X_train, Y_train)

# ------- Evaluate model
Y_train_pred = model.predict(X_train)
Y_test_pred = model.predict(X_test)

print(
    "Jaccard score on train data:",
    jaccard_score(Y_train, Y_train_pred, average="samples"),
)
print(
    "Jaccard score on test data:",
    jaccard_score(Y_test, Y_test_pred, average="samples"),
Exemplo n.º 7
0
# 2. Build the model
x1 = Input(name="x1")
x2 = Input(name="x2")
y_t = Input(name="y_t")

y1 = ExtraTreesClassifier()(x1, y_t)
y2 = RandomForestClassifier()(x2, y_t)
z = PowerTransformer()(x2)
z = PCA()(z)
y3 = LogisticRegression()(z, y_t)

stacked_features = Stack()([y1, y2, y3])
y_p = SVC()(stacked_features, y_t)

model = Model([x1, x2], y_p, y_t)
plot_model(model,
           filename="multiple_input_nonlinear_pipeline_example_plot.png")

# 3. Train the model
dataset = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(dataset.data,
                                                    dataset.target,
                                                    random_state=0)

# Let's suppose the dataset is originally split in two
X1_train, X2_train = X_train[:, :15], X_train[:, 15:]
X1_test, X2_test = X_test[:, :15], X_test[:, 15:]

model.fit([X1_train, X2_train], y_train)

# 4. Use the model
y_test_pred = model.predict([X1_test, X2_test])
Exemplo n.º 8
0
import sklearn.svm
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

from baikal import Input, Model, make_step
from baikal.plot import plot_model

# 1. Define a step
SVC = make_step(sklearn.svm.SVC)

# 2. Build the model
x = Input()
y_t = Input()
y_p = SVC(C=1.0, kernel="rbf", gamma=0.5)(x, y_t)

model = Model(x, y_p, y_t)
plot_model(model, filename="readme_quick_example.png")

# 3. Train the model
dataset = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(dataset.data,
                                                    dataset.target,
                                                    random_state=0)

model.fit(X_train, y_train)

# 4. Use the model
y_test_pred = model.predict(X_test)
Exemplo n.º 9
0
ExtraTreesClassifier = make_step(sklearn.ensemble.ExtraTreesClassifier)

# ------- Load dataset
data = sklearn.datasets.load_breast_cancer()
X, y_p = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y_p,
                                                    test_size=0.2,
                                                    random_state=0)

# ------- Build model
x = Input()
y_t = Input()
y_p1 = LogisticRegression(function="predict_proba")(x, y_t)
y_p2 = RandomForestClassifier(function="predict_proba")(x, y_t)
ensemble_features = Concatenate()([y_p1, y_p2])
y_p = ExtraTreesClassifier()(ensemble_features, y_t)

model = Model(x, y_p, y_t)
plot_model(model, filename="stacked_classifiers.png", dpi=96)

# ------- Train model
model.fit(X_train, y_train)

# ------- Evaluate model
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

print("F1 score on train data:", f1_score(y_train, y_train_pred))
print("F1 score on test data:", f1_score(y_test, y_test_pred))
Exemplo n.º 10
0
dataset = load_boston()
target = np.array(dataset.feature_names) == "DIS"
X = dataset.data[:, np.logical_not(target)]
y = dataset.data[:, target].squeeze()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# ------- Build model
transformer = QuantileTransformer(n_quantiles=300,
                                  output_distribution="normal")

x = Input()
y_t = Input()
# QuantileTransformer requires an explicit feature dimension, hence the Lambda step
y_t_trans = Lambda(np.reshape, newshape=(-1, 1))(y_t)
y_t_trans = transformer(y_t_trans)
y_p_trans = RidgeCV()(x, y_t_trans)
y_p = transformer(y_p_trans, compute_func="inverse_transform", trainable=False)

model = Model(x, y_p, y_t)
plot_model(model, filename="transformed_target.png", dpi=96)

# ------- Train model
model.fit(X_train, y_train)

# ------- Evaluate model
y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = median_absolute_error(y_test, y_pred)
print("R^2={}\nMAE={}".format(r2, mae))
Exemplo n.º 11
0
# ------- Build model
x = Input()
y_t = Input()

ys_t = Split(n_targets, axis=1)(y_t)
ys_p = []
for j, k in enumerate(order):
    x_stacked = ColumnStack()(inputs=[x, *ys_p[:j]])
    ys_t[k] = Lambda(np.squeeze, axis=1)(ys_t[k])
    ys_p.append(LogisticRegression(solver="lbfgs")(x_stacked, ys_t[k]))

ys_p = [ys_p[order.index(j)] for j in range(n_targets)]
y_p = ColumnStack()(ys_p)

model = Model(inputs=x, outputs=y_p, targets=y_t)
plot_model(model, filename="classifier_chain.png",
           dpi=96)  # This might take a few seconds

# ------- Train model
model.fit(X_train, Y_train)

# ------- Evaluate model
Y_train_pred = model.predict(X_train)
Y_test_pred = model.predict(X_test)

print(
    "Jaccard score on train data:",
    jaccard_score(Y_train, Y_train_pred, average="samples"),
)
print(
    "Jaccard score on test data:",
    jaccard_score(Y_test, Y_test_pred, average="samples"),