Exemplo n.º 1
0
def dfm_criteo(dense_input, sparse_input, y_):
    feature_dimension = 33762577
    embedding_size = 128
    learning_rate = 0.01

    # FM
    Embedding1 = init.random_normal([feature_dimension, 1],
                                    stddev=0.01,
                                    name="fst_order_embedding",
                                    ctx=ndarray.cpu(0))
    FM_W = init.random_normal([13, 1], stddev=0.01, name="dense_parameter")
    sparse_1dim_input = ad.embedding_lookup_op(Embedding1,
                                               sparse_input,
                                               ctx=ndarray.cpu(0))
    fm_dense_part = ad.matmul_op(dense_input, FM_W)
    fm_sparse_part = ad.reduce_sum_op(sparse_1dim_input, axes=1)
    """ fst order output"""
    y1 = fm_dense_part + fm_sparse_part

    Embedding2 = init.random_normal([feature_dimension, embedding_size],
                                    stddev=0.01,
                                    name="snd_order_embedding",
                                    ctx=ndarray.cpu(0))
    sparse_2dim_input = ad.embedding_lookup_op(Embedding2,
                                               sparse_input,
                                               ctx=ndarray.cpu(0))
    sparse_2dim_sum = ad.reduce_sum_op(sparse_2dim_input, axes=1)
    sparse_2dim_sum_square = ad.mul_op(sparse_2dim_sum, sparse_2dim_sum)

    sparse_2dim_square = ad.mul_op(sparse_2dim_input, sparse_2dim_input)
    sparse_2dim_square_sum = ad.reduce_sum_op(sparse_2dim_square, axes=1)
    sparse_2dim = sparse_2dim_sum_square + -1 * sparse_2dim_square_sum
    sparse_2dim_half = sparse_2dim * 0.5
    """snd order output"""
    y2 = ad.reduce_sum_op(sparse_2dim_half, axes=1, keepdims=True)

    #DNN
    flatten = ad.array_reshape_op(sparse_2dim_input, (-1, 26 * embedding_size))
    W1 = init.random_normal([26 * embedding_size, 256], stddev=0.01, name="W1")
    W2 = init.random_normal([256, 256], stddev=0.01, name="W2")
    W3 = init.random_normal([256, 1], stddev=0.01, name="W3")

    fc1 = ad.matmul_op(flatten, W1)
    relu1 = ad.relu_op(fc1)
    fc2 = ad.matmul_op(relu1, W2)
    relu2 = ad.relu_op(fc2)
    y3 = ad.matmul_op(relu2, W3)

    y4 = y1 + y2
    y = y4 + y3
    y = ad.sigmoid_op(y)

    loss = ad.binarycrossentropy_op(y, y_)
    loss = ad.reduce_mean_op(loss, [0])
    opt = optimizer.SGDOptimizer(learning_rate=learning_rate)
    train_op = opt.minimize(loss)

    return loss, y, y_, train_op
Exemplo n.º 2
0
def neural_mf(user_input, item_input, y_, num_users, num_items):
    batch_size = 256
    embed_dim = 8
    layers = [64, 32, 16, 8]
    learning_rate = 0.01

    User_Embedding = init.random_normal(
        (num_users, embed_dim + layers[0] // 2),
        stddev=0.01,
        name="user_embed",
        ctx=ndarray.cpu(0))
    Item_Embedding = init.random_normal(
        (num_items, embed_dim + layers[0] // 2),
        stddev=0.01,
        name="item_embed",
        ctx=ndarray.cpu(0))
    # MLP_User_Embedding = init.random_normal((num_users, layers[0] // 2), stddev=0.01, name="mlp_user_embed", ctx=ndarray.cpu(0))
    # MLP_Item_Embedding = init.random_normal((num_items, layers[0] // 2), stddev=0.01, name="mlp_item_embed", ctx=ndarray.cpu(0))

    user_latent = ad.embedding_lookup_op(User_Embedding,
                                         user_input,
                                         ctx=ndarray.cpu(0))
    item_latent = ad.embedding_lookup_op(Item_Embedding,
                                         item_input,
                                         ctx=ndarray.cpu(0))

    mf_user_latent = ad.slice_op(user_latent, (0, 0), (-1, embed_dim))
    mlp_user_latent = ad.slice_op(user_latent, (0, embed_dim), (-1, -1))
    mf_item_latent = ad.slice_op(item_latent, (0, 0), (-1, embed_dim))
    mlp_item_latent = ad.slice_op(item_latent, (0, embed_dim), (-1, -1))

    # mf_user_latent = ad.embedding_lookup_op(MF_User_Embedding, user_input, ctx=ndarray.cpu(0))
    # mf_item_latent = ad.embedding_lookup_op(MF_Item_Embedding, item_input, ctx=ndarray.cpu(0))
    # mlp_user_latent = ad.embedding_lookup_op(MLP_User_Embedding, user_input, ctx=ndarray.cpu(0))
    # mlp_item_latent = ad.embedding_lookup_op(MLP_Item_Embedding, item_input, ctx=ndarray.cpu(0))

    W1 = init.random_normal((layers[0], layers[1]), stddev=0.1, name='W1')
    W2 = init.random_normal((layers[1], layers[2]), stddev=0.1, name='W2')
    W3 = init.random_normal((layers[2], layers[3]), stddev=0.1, name='W3')
    W4 = init.random_normal((embed_dim + layers[3], 1), stddev=0.1, name='W4')

    mf_vector = ad.mul_op(mf_user_latent, mf_item_latent)
    mlp_vector = ad.concat_op(mlp_user_latent, mlp_item_latent, axis=1)
    fc1 = ad.matmul_op(mlp_vector, W1)
    relu1 = ad.relu_op(fc1)
    fc2 = ad.matmul_op(relu1, W2)
    relu2 = ad.relu_op(fc2)
    fc3 = ad.matmul_op(relu2, W3)
    relu3 = ad.relu_op(fc3)
    concat_vector = ad.concat_op(mf_vector, relu3, axis=1)
    y = ad.matmul_op(concat_vector, W4)
    y = ad.sigmoid_op(y)
    loss = ad.binarycrossentropy_op(y, y_)
    loss = ad.reduce_mean_op(loss, [0])
    opt = optimizer.SGDOptimizer(learning_rate=learning_rate)
    # opt = optimizer.AdamOptimizer(learning_rate=learning_rate)
    train_op = opt.minimize(loss)
    return loss, y, train_op
Exemplo n.º 3
0
def train_hetu(args):
    with open(os.path.join(args.path, "meta.yml"), 'rb') as f:
        meta = yaml.load(f.read(), Loader=yaml.FullLoader)
    hidden_layer_size = args.hidden_size
    num_epoch = args.num_epoch
    rank = int(os.environ["WORKER_ID"])
    nrank = int(os.environ["DMLC_NUM_WORKER"])
    ctx = ndarray.gpu(rank)

    x_ = ad.Variable(name="x_")
    y_ = ad.Variable(name="y_")
    mask_ = ad.Variable(name="mask_")
    gcn1 = GraphSage(meta["feature"], hidden_layer_size, activation="relu", dropout=0.1)
    gcn2 = GraphSage(2*hidden_layer_size, hidden_layer_size, activation="relu", dropout=0.1)

    x = gcn1(x_)
    x = gcn2(x)
    W = initializers.xavier_uniform(shape=(2*hidden_layer_size, meta["class"]))
    B = initializers.zeros(shape=(meta["class"],))
    x = ad.matmul_op(x, W)
    y = x + ad.broadcastto_op(B, x)
    loss = ad.softmaxcrossentropy_op(y, y_)
    loss = ad.mul_op(loss, mask_)
    loss = ad.reduce_mean_op(loss, [0])
    opt = optimizer.SGDOptimizer(0.1)
    train_op = opt.minimize(loss)
    executor = ad.Executor([loss, y, train_op], ctx=ctx, comm_mode='PS')
    distributed.ps_init(rank, nrank)

    batch_size = 4000
    with DistributedGraphSageSampler(args.path, batch_size, 2, 2, rank=rank, nrank=nrank) as sampler:
        epoch = 0
        nnodes = 0
        start = time.time()
        while True:
            g_sample, mask = sampler.sample()
            mp_val = mp_matrix(g_sample, ndarray.gpu(rank))
            feed_dict = {
                gcn1.mp : mp_val,
                gcn2.mp : mp_val,
                mask_ : ndarray.array(mask, ctx=ctx),
                x_ : ndarray.array(g_sample.x, ctx=ctx),
                y_ : ndarray.array(convert_to_one_hot(g_sample.y, max_val=g_sample.num_classes), ctx=ctx)
            }
            loss_val, y_predicted, _ = executor.run(feed_dict = feed_dict)
            y_predicted = y_predicted.asnumpy().argmax(axis=1)
            acc = ((y_predicted == g_sample.y) * mask).sum()
            distributed.ps_get_worker_communicator().BarrierWorker()
            nnodes += batch_size
            if nnodes > meta["partition"]["nodes"][rank]:
                nnodes = 0
                epoch += 1
                print("Epoch :", epoch, time.time() - start)
                print("Train accuracy:", acc/mask.sum())
                start = time.time()
                if epoch >= num_epoch:
                    break
Exemplo n.º 4
0
def cross_layer(x0, x1):
    # x0: input embedding feature (batch_size, 26 * embedding_size + 13)
    # x1: the output of last layer (batch_size, 26 * embedding_size + 13)

    embedding_len = 26 * 128 + 13
    weight = init.random_normal(shape=(embedding_len, 1),
                                stddev=0.01,
                                name='weight')
    bias = init.random_normal(shape=(embedding_len, ),
                              stddev=0.01,
                              name='bias')
    x1w = ad.matmul_op(x1, weight)  #(batch_size, 1)
    y = ad.mul_op(x0, ad.broadcastto_op(x1w, x0))
    y = y + x1 + ad.broadcastto_op(bias, y)
    return y
Exemplo n.º 5
0
def train_hetu(num_epoch):
    ctx = ndarray.gpu(0)

    x_ = ad.Variable(name="x_")
    y_ = ad.Variable(name="y_")
    mask_ = ad.Variable(name="mask_")

    gcn1 = GraphSage(graph.num_features, hidden_layer_size, activation="relu", dropout=0.1)
    gcn2 = GraphSage(2*hidden_layer_size, hidden_layer_size, activation="relu", dropout=0.1)

    x = gcn1(x_)
    x = gcn2(x)
    W = initializers.xavier_uniform(shape=(2*hidden_layer_size, graph.num_classes))
    B = initializers.zeros(shape=(graph.num_classes,))
    x = ad.matmul_op(x, W)
    y = x + ad.broadcastto_op(B, x)

    loss = ad.softmaxcrossentropy_op(y, y_)
    loss = ad.mul_op(loss, mask_)
    opt = optimizer.AdamOptimizer(0.01)
    train_op = opt.minimize(loss)
    executor = ad.Executor([loss, y, train_op], ctx=ctx)

    def eval():
        start = time.time()
        ad.Dropout.DropoutOp.phase = "eval"
        mp_val = mp_matrix(graph_full, ctx)

        feed_dict = {
            gcn1.mp : mp_val,
            gcn2.mp : mp_val,
            x_ : ndarray.array(graph_full.x, ctx=ctx),
        }
        executor_eval = ad.Executor([y], ctx=ctx)
        y_predicted, = executor_eval.run(feed_dict=feed_dict)
        y_predicted = y_predicted.asnumpy().argmax(axis=1)
        acc = (y_predicted == graph_full.y)[train_split:].sum()
        print("Test accuracy:", acc/len(y_predicted[train_split:]))
        ad.Dropout.DropoutOp.phase = "training"
    epoch = 0
    nnodes = 0
    batch_size = 1000
    with GraphSageSampler(graph, batch_size, depth=2, num_sample_thread=4) as sampler:
        start = time.time()
        while True:
            g_sample, mask = sampler.sample()
            mp_val = mp_matrix(g_sample, ctx)
            #print(time.time() - start)
            feed_dict = {
                gcn1.mp : mp_val,
                gcn2.mp : mp_val,
                mask_ : ndarray.array(mask,ctx=ctx),
                x_ : ndarray.array(g_sample.x, ctx=ctx),
                y_ : ndarray.array(convert_to_one_hot(g_sample.y, max_val=graph.num_classes), ctx=ctx)
            }
            loss_val, y_predicted, _ = executor.run(feed_dict = feed_dict)
            y_predicted = y_predicted.asnumpy().argmax(axis=1)
            acc = ((y_predicted == g_sample.y) * mask).sum()
            # print(i, "Train loss :", loss_val.asnumpy().mean())
            # print(i, "Train accuracy:", acc/len(y_predicted))
            nnodes += batch_size
            if nnodes > graph_full.num_nodes:
                nnodes = 0
                epoch += 1
                print("Epoch :", epoch, time.time() - start)
                print("Train accuracy:", acc/mask.sum())
                eval()
                start = time.time()
                if epoch >= num_epoch:
                    break
Exemplo n.º 6
0
Arquivo: LSTM.py Projeto: sj1104/Het
def lstm(x, y_):
    '''
    LSTM model, for MNIST dataset.

    Parameters:
        x: Variable(hetu.gpu_ops.Node.Node), shape (N, dims)
        y_: Variable(hetu.gpu_ops.Node.Node), shape (N, num_classes)
    Return:
        loss: Variable(hetu.gpu_ops.Node.Node), shape (1,)
        y: Variable(hetu.gpu_ops.Node.Node), shape (N, num_classes)
    '''

    print("Building LSTM model...")
    diminput = 28
    dimhidden = 128
    dimoutput = 10
    nsteps = 28

    forget_gate_w = init.random_normal(shape=(diminput, dimhidden),
                                       stddev=0.1,
                                       name="lstm_forget_gate_w")
    forget_gate_u = init.random_normal(shape=(dimhidden, dimhidden),
                                       stddev=0.1,
                                       name="lstm_forget_gate_u")
    forget_gate_b = init.random_normal(shape=(dimhidden, ),
                                       stddev=0.1,
                                       name="lstm_forget_gate_b")
    input_gate_w = init.random_normal(shape=(diminput, dimhidden),
                                      stddev=0.1,
                                      name="lstm_input_gate_w")
    input_gate_u = init.random_normal(shape=(dimhidden, dimhidden),
                                      stddev=0.1,
                                      name="lstm_input_gate_u")
    input_gate_b = init.random_normal(shape=(dimhidden, ),
                                      stddev=0.1,
                                      name="lstm_input_gate_b")
    output_gate_w = init.random_normal(shape=(diminput, dimhidden),
                                       stddev=0.1,
                                       name="lstm_output_gate_w")
    output_gate_u = init.random_normal(shape=(dimhidden, dimhidden),
                                       stddev=0.1,
                                       name="lstm_output_gate_u")
    output_gate_b = init.random_normal(shape=(dimhidden, ),
                                       stddev=0.1,
                                       name="lstm_output_gate_b")
    tanh_w = init.random_normal(shape=(diminput, dimhidden),
                                stddev=0.1,
                                name="lstm_tanh_w")
    tanh_u = init.random_normal(shape=(dimhidden, dimhidden),
                                stddev=0.1,
                                name="lstm_tanh_u")
    tanh_b = init.random_normal(shape=(dimhidden, ),
                                stddev=0.1,
                                name="lstm_tanh_b")
    out_weights = init.random_normal(shape=(dimhidden, dimoutput),
                                     stddev=0.1,
                                     name="lstm_out_weight")
    out_bias = init.random_normal(shape=(dimoutput, ),
                                  stddev=0.1,
                                  name="lstm_out_bias")
    initial_state = ad.Variable(value=np.zeros((1, )).astype(np.float32),
                                name='initial_state',
                                trainable=False)

    for i in range(nsteps):
        cur_x = ad.slice_op(x, (0, i * diminput), (-1, diminput))
        # forget gate
        if i == 0:
            temp = ad.matmul_op(cur_x, forget_gate_w)
            last_c_state = ad.broadcastto_op(initial_state, temp)
            last_h_state = ad.broadcastto_op(initial_state, temp)
            cur_forget = ad.matmul_op(last_h_state, forget_gate_u) + temp
        else:
            cur_forget = ad.matmul_op(last_h_state,
                                      forget_gate_u) + ad.matmul_op(
                                          cur_x, forget_gate_w)
        cur_forget = cur_forget + ad.broadcastto_op(forget_gate_b, cur_forget)
        cur_forget = ad.sigmoid_op(cur_forget)
        # input gate
        cur_input = ad.matmul_op(last_h_state, input_gate_u) + ad.matmul_op(
            cur_x, input_gate_w)
        cur_input = cur_input + ad.broadcastto_op(input_gate_b, cur_input)
        cur_input = ad.sigmoid_op(cur_input)
        # output gate
        cur_output = ad.matmul_op(last_h_state, output_gate_u) + ad.matmul_op(
            cur_x, output_gate_w)
        cur_output = cur_output + ad.broadcastto_op(output_gate_b, cur_output)
        cur_output = ad.sigmoid_op(cur_output)
        # tanh
        cur_tanh = ad.matmul_op(last_h_state, tanh_u) + ad.matmul_op(
            cur_x, tanh_w)
        cur_tanh = cur_tanh + ad.broadcastto_op(tanh_b, cur_tanh)
        cur_tanh = ad.tanh_op(cur_tanh)

        last_c_state = ad.mul_op(last_c_state, cur_forget) + ad.mul_op(
            cur_input, cur_tanh)
        last_h_state = ad.tanh_op(last_c_state) * cur_output

    x = ad.matmul_op(last_h_state, out_weights)
    y = x + ad.broadcastto_op(out_bias, x)
    loss = ad.softmaxcrossentropy_op(y, y_)
    loss = ad.reduce_mean_op(loss, [0])
    return loss, y