Пример #1
0
def make_all_py_reader_inputs(input_fields, is_test=False):
    reader = layers.py_reader(
        capacity=20,
        name="test_reader" if is_test else "train_reader",
        shapes=[input_descs[input_field][0] for input_field in input_fields],
        dtypes=[input_descs[input_field][1] for input_field in input_fields],
        lod_levels=[
            input_descs[input_field][2]
            if len(input_descs[input_field]) == 3 else 0
            for input_field in input_fields
        ])
    return layers.read_file(reader), reader
Пример #2
0
def make_all_py_reader_inputs(input_fields, is_test=False):
    """
    Define the input data layers for the transformer model.
    """
    reader = layers.py_reader(
        capacity=20,
        name="test_reader" if is_test else "train_reader",
        shapes=[input_descs[input_field][0] for input_field in input_fields],
        dtypes=[input_descs[input_field][1] for input_field in input_fields],
        lod_levels=[
            input_descs[input_field][2]
            if len(input_descs[input_field]) == 3 else 0
            for input_field in input_fields
        ], use_double_buffer=True)
    return layers.read_file(reader), reader
Пример #3
0
def node2vec_model(graph, hidden_size=16, neg_num=5):

    pyreader = l.py_reader(
        capacity=70,
        shapes=[[-1, 1, 1], [-1, 1, 1], [-1, neg_num, 1]],
        dtypes=['int64', 'int64', 'int64'],
        lod_levels=[0, 0, 0],
        name='train',
        use_double_buffer=True)

    embed_init = fluid.initializer.UniformInitializer(low=-1.0, high=1.0)
    weight_init = fluid.initializer.TruncatedNormal(scale=1.0 /
                                                    math.sqrt(hidden_size))

    src, pos, negs = l.read_file(pyreader)

    embed_src = l.embedding(
        input=src,
        size=[graph.num_nodes, hidden_size],
        param_attr=fluid.ParamAttr(
            name='content', initializer=embed_init))

    weight_pos = l.embedding(
        input=pos,
        size=[graph.num_nodes, hidden_size],
        param_attr=fluid.ParamAttr(
            name='weight', initializer=weight_init))
    weight_negs = l.embedding(
        input=negs,
        size=[graph.num_nodes, hidden_size],
        param_attr=fluid.ParamAttr(
            name='weight', initializer=weight_init))

    pos_logits = l.matmul(
        embed_src, weight_pos, transpose_y=True)  # [batch_size, 1, 1]
    neg_logits = l.matmul(
        embed_src, weight_negs, transpose_y=True)  # [batch_size, 1, neg_num]

    ones_label = pos_logits * 0. + 1.
    ones_label.stop_gradient = True
    pos_loss = l.sigmoid_cross_entropy_with_logits(pos_logits, ones_label)

    zeros_label = neg_logits * 0.
    zeros_label.stop_gradient = True
    neg_loss = l.sigmoid_cross_entropy_with_logits(neg_logits, zeros_label)
    loss = (l.reduce_mean(pos_loss) + l.reduce_mean(neg_loss)) / 2

    return pyreader, loss
Пример #4
0
def node_classify_model(graph,
                        num_labels,
                        hidden_size=16,
                        name='node_classify_task'):
    pyreader = l.py_reader(capacity=70,
                           shapes=[[-1, 1], [-1, num_labels]],
                           dtypes=['int64', 'float32'],
                           lod_levels=[0, 0],
                           name=name + '_pyreader',
                           use_double_buffer=True)
    nodes, labels = l.read_file(pyreader)
    embed_nodes = l.embedding(input=nodes,
                              size=[graph.num_nodes, hidden_size],
                              param_attr=fluid.ParamAttr(name='content'))
    embed_nodes.stop_gradient = True
    logits = l.fc(input=embed_nodes, size=num_labels)
    loss = l.sigmoid_cross_entropy_with_logits(logits, labels)
    loss = l.reduce_mean(loss)
    prob = l.sigmoid(logits)
    topk = l.reduce_sum(labels, -1)
    return pyreader, loss, prob, labels, topk
Пример #5
0
def node_classify_model(graph,
                        num_labels,
                        embed_dim=16,
                        name='node_classify_task'):
    """Build node classify model.

    Args:
        graph: The :code:`Graph` data object.

        num_labels: The number of labels.

        embed_dim: The dimension of embedding.

        name: The name of the model.
    """
    pyreader = l.py_reader(
        capacity=70,
        shapes=[[-1, 1], [-1, num_labels]],
        dtypes=['int64', 'float32'],
        lod_levels=[0, 0],
        name=name + '_pyreader',
        use_double_buffer=True)
    nodes, labels = l.read_file(pyreader)
    embed_nodes = l.embedding(
        input=nodes, size=[graph.num_nodes, embed_dim], param_attr='shared_w')
    embed_nodes.stop_gradient = True
    logits = l.fc(input=embed_nodes, size=num_labels)
    loss = l.sigmoid_cross_entropy_with_logits(logits, labels)
    loss = l.reduce_mean(loss)
    prob = l.sigmoid(logits)
    topk = l.reduce_sum(labels, -1)
    return {
        'pyreader': pyreader,
        'loss': loss,
        'prob': prob,
        'labels': labels,
        'topk': topk
    }
Пример #6
0
    def forward(self):
        src, dsts = L.read_file(self.pyreader)

        if self.is_sparse:
            src = L.reshape(src, [-1, 1])
            dsts = L.reshape(dsts, [-1, 1])

        if self.num_part is not None and self.num_part != 1 and not self.is_distributed:
            src_embed = distributed_embedding(src,
                                              self.num_nodes,
                                              self.embed_dim,
                                              self.embed_init,
                                              "weight",
                                              self.num_part,
                                              self.is_sparse,
                                              learning_rate=self.embedding_lr)

            dsts_embed = distributed_embedding(dsts,
                                               self.num_nodes,
                                               self.embed_dim,
                                               self.embed_init,
                                               "weight",
                                               self.num_part,
                                               self.is_sparse,
                                               learning_rate=self.embedding_lr)
        else:
            src_embed = L.embedding(src, (self.num_nodes, self.embed_dim),
                                    self.is_sparse,
                                    self.is_distributed,
                                    param_attr=F.ParamAttr(
                                        name="weight",
                                        learning_rate=self.embedding_lr,
                                        initializer=self.embed_init))

            dsts_embed = L.embedding(dsts, (self.num_nodes, self.embed_dim),
                                     self.is_sparse,
                                     self.is_distributed,
                                     param_attr=F.ParamAttr(
                                         name="weight",
                                         learning_rate=self.embedding_lr,
                                         initializer=self.embed_init))

        if self.is_sparse:
            src_embed = L.reshape(src_embed, [-1, 1, self.embed_dim])
            dsts_embed = L.reshape(dsts_embed,
                                   [-1, self.neg_num + 1, self.embed_dim])

        logits = L.matmul(src_embed, dsts_embed,
                          transpose_y=True)  # [batch_size, 1, neg_num+1]

        pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
                                                    "float32", 1)
        neg_label = L.fill_constant_batch_size_like(logits,
                                                    [-1, 1, self.neg_num],
                                                    "float32", 0)
        label = L.concat([pos_label, neg_label], -1)

        pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
                                                     "float32", self.neg_num)
        neg_weight = L.fill_constant_batch_size_like(logits,
                                                     [-1, 1, self.neg_num],
                                                     "float32", 1)
        weight = L.concat([pos_weight, neg_weight], -1)

        weight.stop_gradient = True
        label.stop_gradient = True

        loss = L.sigmoid_cross_entropy_with_logits(logits, label)
        loss = loss * weight
        loss = L.reduce_mean(loss)
        loss = loss * ((self.neg_num + 1) / 2 / self.neg_num)
        loss.persistable = True
        self.loss = loss
        return loss
Пример #7
0
    def forward(self):
        """ forward
        """
        src, dst = L.read_file(self.pyreader)

        src_id = L.slice(src, [0, 1, 2, 3], [0, 0, 0, 0],
                         [int(math.pow(2, 30)) - 1, 1, 1, 1])
        dst_id = L.slice(dst, [0, 1, 2, 3], [0, 0, 0, 0],
                         [int(math.pow(2, 30)) - 1, self.neg_num + 1, 1, 1])

        if self.is_sparse:
            # sparse mode use 2 dims input.
            src = L.reshape(src, [-1, 1])
            dst = L.reshape(dst, [-1, 1])

        # [b, 1, f, h]
        src_embed = split_embedding(src, self.num_nodes, self.hidden_size,
                                    self.embed_init, "weight", self.num_part,
                                    self.is_sparse)

        # [b, n+1, f, h]
        dst_embed = split_embedding(dst, self.num_nodes, self.hidden_size,
                                    self.embed_init, "weight", self.num_part,
                                    self.is_sparse)

        if self.is_sparse:
            src_embed = L.reshape(src_embed,
                                  [-1, 1, self.num_featuers, self.hidden_size])
            dst_embed = L.reshape(
                dst_embed,
                [-1, self.neg_num + 1, self.num_featuers, self.hidden_size])

        # [b, 1, 1, f]
        src_weight = L.softmax(
            L.embedding(src_id, [self.num_nodes, self.num_featuers],
                        param_attr=F.ParamAttr(name="alpha")))
        # [b, n+1, 1, f]
        dst_weight = L.softmax(
            L.embedding(dst_id, [self.num_nodes, self.num_featuers],
                        param_attr=F.ParamAttr(name="alpha")))

        # [b, 1, h]
        src_sum = L.squeeze(L.matmul(src_weight, src_embed), axes=[2])
        # [b, n+1, h]
        dst_sum = L.squeeze(L.matmul(dst_weight, dst_embed), axes=[2])

        logits = L.matmul(src_sum, dst_sum,
                          transpose_y=True)  # [batch_size, 1, neg_num+1]

        pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
                                                    "float32", 1)
        neg_label = L.fill_constant_batch_size_like(logits,
                                                    [-1, 1, self.neg_num],
                                                    "float32", 0)
        label = L.concat([pos_label, neg_label], -1)

        pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
                                                     "float32", self.neg_num)
        neg_weight = L.fill_constant_batch_size_like(logits,
                                                     [-1, 1, self.neg_num],
                                                     "float32", 1)
        weight = L.concat([pos_weight, neg_weight], -1)

        weight.stop_gradient = True
        label.stop_gradient = True

        loss = L.sigmoid_cross_entropy_with_logits(logits, label)
        loss = loss * weight
        loss = L.reduce_mean(loss)
        loss = loss * ((self.neg_num + 1) / 2 / self.neg_num)
        loss.persistable = True
        self.loss = loss
        return loss