示例#1
0
def test_arc_cosine(make_data):
    """Test the random Arc Cosine kernel."""
    S = 3
    x, _, _ = make_data
    x_, X_ = _make_placeholders(x, S)

    F, KL = ab.RandomArcCosine(n_features=10)(X_)

    tc = tf.test.TestCase()
    with tc.test_session():
        f = F.eval(feed_dict={x_: x})

        assert f.shape == (3, x.shape[0], 10)
        assert KL == 0
示例#2
0
def main():
    """Run the demo."""
    # Get Continuous and categorical data
    df_train, df_test = fetch_data()
    df = pd.concat((df_train, df_test))
    X_con, X_cat, n_cats, Y = input_fn(df)

    # Define the continuous layers
    con_layer = (ab.InputLayer(name='con', n_samples=T_SAMPLES) >>
                 ab.DenseVariational(output_dim=5, full=True))

    # Now define the cateogrical layers, which we embed
    # Note every embed_var call can be different, this is just "lazy"
    cat_layer_list = [ab.EmbedVariational(EMBED_DIMS, i) for i in n_cats]
    cat_layer = (
        ab.InputLayer(name='cat', n_samples=T_SAMPLES) >> ab.PerFeature(
            *cat_layer_list)  # Assign columns to an embedding layers
    )

    # Now we can feed the initial continuous and cateogrical layers to further
    # "joint" layers after we concatenate them
    net = ab.stack(ab.Concat(con_layer, cat_layer),
                   ab.RandomArcCosine(100, 1.),
                   ab.DenseVariational(output_dim=1, full=True))

    # Split data into training and testing
    Xt_con, Xs_con = np.split(X_con, [len(df_train)], axis=0)
    Xt_cat, Xs_cat = np.split(X_cat, [len(df_train)], axis=0)
    Yt, Ys = np.split(Y, [len(df_train)], axis=0)

    # Graph place holders
    X_con_ = tf.placeholder(tf.float32, [None, Xt_con.shape[1]])
    X_cat_ = tf.placeholder(tf.int32, [None, Xt_cat.shape[1]])
    Y_ = tf.placeholder(tf.float32, [None, 1])

    # Feed dicts
    train_dict = {X_con_: Xt_con, X_cat_: Xt_cat, Y_: Yt}
    test_dict = {X_con_: Xs_con, X_cat_: Xs_cat}

    # Make model
    N = len(Xt_con)
    nn, kl = net(con=X_con_, cat=X_cat_)
    likelihood = tf.distributions.Bernoulli(logits=nn)

    loss = ab.elbo(likelihood, Y_, N, kl)
    optimizer = tf.train.AdamOptimizer()
    train = optimizer.minimize(loss)
    init = tf.global_variables_initializer()

    with tf.Session(config=CONFIG):
        init.run()

        # We're going to just use a feed_dict to feed in batches, which we
        # generate here
        batches = ab.batch(train_dict, batch_size=BSIZE, n_iter=NITER)

        for i, data in enumerate(batches):
            train.run(feed_dict=data)
            if i % 1000 == 0:
                loss_val = loss.eval(feed_dict=data)
                print("Iteration {}, loss = {}".format(i, loss_val))

        # Predict
        Ep = ab.predict_expected(nn, test_dict, P_SAMPLES)

    Ey = Ep > 0.5  # Max probability assignment

    acc = accuracy_score(Ys.flatten(), Ey.flatten())
    logloss = log_loss(Ys.flatten(), np.hstack((1 - Ep, Ep)))

    print("Accuracy = {}, log loss = {}".format(acc, logloss))
示例#3
0
NEPOCHS = 5  # Number of times to see the data in training
BSIZE = 100  # Mini batch size
CONFIG = tf.ConfigProto(device_count={'GPU': 0})  # Use GPU ?
LSAMPLES = 5  # Number of samples the mode returns
PSAMPLES = 10  # This will give LSAMPLES * PSAMPLES predictions

NCLASSES = 7  # Number of target classes
NFEATURES = 100  # Number of random features to use

# Network construction
data_input = ab.InputLayer(name='X', n_samples=LSAMPLES)  # Data input
mask_input = ab.MaskInputLayer(name='M')  # Missing data mask input

lenscale = ab.pos(tf.Variable(np.ones((54, 1), dtype=np.float32)))

layers = (ab.RandomArcCosine(n_features=NFEATURES, lenscale=lenscale) >>
          ab.DenseVariational(output_dim=NCLASSES))


def main():
    """Run the imputation demo."""
    # Fetch data, one-hot targets and standardise data
    data = fetch_covtype()
    X = data.data
    Y = (data.target - 1)
    X = StandardScaler().fit_transform(X)

    # Now fake some missing data with a mask
    rnd = np.random.RandomState(RSEED)
    mask = rnd.rand(*X.shape) < FRAC_MISSING
    X[mask] = MISSING_VAL