def forward(self, is_test=False):
        """
        Build the network.
        """
        substruct_graph_wrapper = GraphWrapper(
            name="graph",
            node_feat=[('atom_type', [None, 1], "int64"),
                       ('chirality_tag', [None, 1], "int64")],
            edge_feat=[('bond_type', [None, 1], "int64"),
                       ('bond_direction', [None, 1], "int64")])
        context_graph_wrapper = GraphWrapper(
            name="context_graph",
            node_feat=[('atom_type', [None, 1], "int64"),
                       ('chirality_tag', [None, 1], "int64")],
            edge_feat=[('bond_type', [None, 1], "int64"),
                       ('bond_direction', [None, 1], "int64")])
        substruct_center_idx = layers.data(name="substruct_center_idx",
                                           shape=[-1, 1],
                                           dtype="int64")
        context_overlap_idx = layers.data(name="context_overlap_idx",
                                          shape=[-1, 1],
                                          dtype="int64")
        context_overlap_lod = layers.data(name="context_overlap_lod",
                                          shape=[1, -1],
                                          dtype="int32")
        context_cycle_index = layers.data(name="context_cycle_index",
                                          shape=[-1, 1],
                                          dtype="int64")

        substruct_node_repr = self.substruct_model.forward(
            substruct_graph_wrapper, is_test=is_test)
        substruct_repr = layers.gather(substruct_node_repr,
                                       substruct_center_idx)

        context_node_repr = self.context_model.forward(context_graph_wrapper,
                                                       is_test=is_test)
        context_overlap_repr = layers.gather(context_node_repr,
                                             context_overlap_idx)
        context_repr = layers.sequence_pool(
            layers.lod_reset(context_overlap_repr, context_overlap_lod),
            self.context_pooling)
        neg_context_repr = layers.gather(context_repr, context_cycle_index)

        pred_pos = layers.reduce_sum(substruct_repr * context_repr, 1)
        pred_neg = layers.reduce_sum(substruct_repr * neg_context_repr, 1)
        label_pos = pred_pos * 0.0 + 1.0
        label_pos.stop_gradient = True
        label_neg = pred_neg * 0.0
        label_neg.stop_gradient = True

        loss = layers.sigmoid_cross_entropy_with_logits(x=pred_pos, label=label_pos) \
                + layers.sigmoid_cross_entropy_with_logits(x=pred_neg, label=label_neg)
        loss = layers.reduce_mean(loss)

        self.substruct_graph_wrapper = substruct_graph_wrapper
        self.context_graph_wrapper = context_graph_wrapper
        self.loss = loss
Пример #2
0
def create_model(args, config, graph_label):
    """Create model for given model configuration."""
    logging.info('building model')
    graph_wrapper = GraphWrapper(name="graph",
                                 node_feat=[('atom_type', [None, 1], "int64"),
                                            ('chirality_tag', [None,
                                                               1], "int64")],
                                 edge_feat=[('bond_type', [None, 1], "int64"),
                                            ('bond_direction', [None,
                                                                1], "int64")])

    encoder = GINEncoder(config)
    global_repr, patch_summary = encoder.forward(graph_wrapper)

    hid = L.fc(global_repr,
               config['hidden_size'],
               act='relu',
               name='finetune_fc1')
    hid = L.fc(hid, config['hidden_size'], act='relu', name='finetune_fc2')

    logits = L.fc(global_repr, args.num_tasks, name="finetune_fc3")
    loss = L.sigmoid_cross_entropy_with_logits(x=logits, label=graph_label)
    loss = L.reduce_mean(loss)
    pred = L.sigmoid(logits)

    keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb', 'pred']
    Agent = namedtuple('Agent', keys)
    return Agent(loss=loss,
                 graph_wrapper=graph_wrapper,
                 encoder=encoder,
                 graph_emb=global_repr,
                 pred=pred)
Пример #3
0
    def __init__(self,
                 model_config,
                 use_pretrained_compound_gnns=False):
        self.model_config = model_config
        self.use_pretrained_compound_gnns = use_pretrained_compound_gnns

        dim = CompoundConstants.atomic_numeric_feat_dim
        self.compound_graph_wrapper = GraphWrapper(
            name="compound_graph",
            node_feat=[
                ('atom_type', [None, 1], "int64"),
                ('chirality_tag', [None, 1], "int64"),
                ('atom_numeric_feat', [None, dim], "float32")],
            edge_feat=[
                ('bond_type', [None, 1], "int64"),
                ('bond_direction', [None, 1], "int64")
            ])

        protein_token = fluid.layers.data(name='protein_token', shape=[None, 1], dtype='int64')
        protein_token_lod = fluid.layers.data(name='protein_token_lod', shape=[None], dtype='int32')
        self.protein_token = fluid.layers.lod_reset(protein_token, y=protein_token_lod)

        if use_pretrained_compound_gnns:
            self.compound_model = PretrainGNNModel(model_config['compound'], name='gnn')  # TODO: update the name to 'compound'
        else:
            self.compound_model = CompoundGNNModel(model_config['compound'], name='compound')

        self.protein_model = ProteinSequenceModel(model_config['protein'], name='protein')
Пример #4
0
    def forward(self, is_test=False):
        """
        Build the network.
        """
        graph_wrapper = GraphWrapper(name="graph",
                node_feat=[
                    ('atom_type', [None, 1], "int64"), 
                    ('chirality_tag', [None, 1], "int64")],
                edge_feat=[
                    ('bond_type', [None, 1], "int64"),
                    ('bond_direction', [None, 1], "int64")])
        masked_node_indice = layers.data(name="masked_node_indice", shape=[-1, 1], dtype="int64")
        masked_node_label = layers.data(name="masked_node_label", shape=[-1, 1], dtype="int64")

        node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test)
        masked_node_repr = layers.gather(node_repr, masked_node_indice)
        logits = layers.fc(masked_node_repr, 
                size=len(CompoundConstants.atom_num_list),
                name="masked_node_logits")

        loss, pred = layers.softmax_with_cross_entropy(
                logits, masked_node_label, return_softmax=True)
        loss = layers.reduce_mean(loss)
        acc = layers.accuracy(pred, masked_node_label)

        self.graph_wrapper = graph_wrapper
        self.loss = loss
    def forward(self, is_test=False):
        """tbd"""
        graph_wrapper = GraphWrapper(name="graph",
                                     node_feat=[
                                         ('atom_type', [None, 1], "int64"),
                                         ('chirality_tag', [None, 1], "int64")
                                     ],
                                     edge_feat=[
                                         ('bond_type', [None, 1], "int64"),
                                         ('bond_direction', [None, 1], "int64")
                                     ])
        supervised_label = layers.data(name="supervised_label",
                                       shape=[None, self.task_num],
                                       dtype="float32")
        valid = layers.data("valid",
                            shape=[None, self.task_num],
                            dtype="float32")

        node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test)
        graph_repr = pgl.layers.graph_pooling(graph_wrapper, node_repr,
                                              self.pool_type)
        logits = layers.fc(graph_repr,
                           size=self.task_num,
                           name="pretrain_supervised_fc")

        loss = layers.sigmoid_cross_entropy_with_logits(x=logits,
                                                        label=supervised_label)
        loss = layers.reduce_sum(loss * valid) / layers.reduce_sum(valid)

        self.graph_wrapper = graph_wrapper
        self.loss = loss
Пример #6
0
def create_model(args, config):
    """Create model for given model configuration."""
    logging.info('building model')
    graph_wrapper = GraphWrapper(name="graph",
                                 node_feat=[('atom_type', [None, 1], "int64"),
                                            ('chirality_tag', [None,
                                                               1], "int64")],
                                 edge_feat=[('bond_type', [None, 1], "int64"),
                                            ('bond_direction', [None,
                                                                1], "int64")])

    # NOTE: [num_nodes, num_graphs], bs = num_graphs
    pos_mask = L.data(name='pos_mask',
                      shape=[-1, args.batch_size],
                      dtype='float32')
    neg_mask = L.data(name='neg_mask',
                      shape=[-1, args.batch_size],
                      dtype='float32')

    encoder = GINEncoder(config)
    global_repr, patch_summary = encoder.forward(graph_wrapper)

    global_D = FF(encoder.embedding_dim)
    local_D = FF(encoder.embedding_dim)
    g_enc = global_D.forward(global_repr)
    l_enc = local_D.forward(patch_summary)

    res = L.matmul(l_enc, g_enc, transpose_y=True)
    E_pos = get_positive_expectation(res * pos_mask,
                                     config['measure'],
                                     average=False)
    E_pos = L.reduce_sum(E_pos) / graph_wrapper.num_nodes
    E_neg = get_negative_expectation(res * neg_mask,
                                     config['measure'],
                                     average=False)
    E_neg = L.reduce_sum(E_neg) / (graph_wrapper.num_nodes *
                                   (graph_wrapper.num_graph - 1))
    local_global_loss = E_neg - E_pos

    if config['prior']:
        prior_D = PriorDiscriminator(encoder.embedding_dim)
        prior = L.uniform_random([args.batch_size, encoder.embedding_dim],
                                 min=0.0,
                                 max=1.0)
        term_1 = L.reduce_mean(L.log(prior_D.forward(prior)))
        term_2 = L.reduce_mean(L.log(1.0 - prior_D.forward(global_repr)))
        prior_loss = -(term_1 + term_2) * config['gamma']
    else:
        prior_loss = 0

    total_loss = local_global_loss + prior_loss

    keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb']
    Agent = namedtuple('Agent', keys)
    return Agent(loss=total_loss,
                 graph_wrapper=graph_wrapper,
                 encoder=encoder,
                 graph_emb=global_repr)
Пример #7
0
 def __init__(self, args, graph_wrapper=None):
     self.hidden_size = args.hidden_size
     self.num_nodes = args.num_nodes
     self.drop_rate = args.drop_rate
     node_feature = [('feat', [None, 128], "float32")]
     if graph_wrapper is None:
         self.graph_wrapper = GraphWrapper(
             name="graph", place=F.CPUPlace(), node_feat=node_feature)
     else:
         self.graph_wrapper = graph_wrapper
     self.build_model(args)
Пример #8
0
    def forward(self, is_test=False):
        """
        Define the forward function,set the parameter layer options.

        Graph wrapper creates a graph data holders that attributes and features 
        in the graph are :code:`fluid.layers.data`.And we provide interface :
        code:`to_feed` to help converting :code:`Graph`data into :code:`feed_dict`.

        Args:
            name: The graph data prefix,here is graph

            node_feat: A list of tuples that decribe the details of node
                    feature tenosr. Each tuple must be (name, shape, dtype)
                    and the first dimension of the shape must be set unknown
                    (-1 or None) or we can easily use :code:`Graph.node_feat_info()`
                    to get the node_feat settings.

            edge_feat: A list of tuples that decribe the details of edge
                    feature tenosr. Each tuple mush be (name, shape, dtype)
                    and the first dimension of the shape must be set unknown
                    (-1 or None) or we can easily use :code:`Graph.edge_feat_info()`
                    to get the edge_feat settings.
     
        """
        graph_wrapper = GraphWrapper(name="graph",
                                     node_feat=[
                                         ('atom_type', [None, 1], "int64"),
                                         ('chirality_tag', [None, 1], "int64")
                                     ],
                                     edge_feat=[
                                         ('bond_type', [None, 1], "int64"),
                                         ('bond_direction', [None, 1], "int64")
                                     ])
        finetune_label = layers.data(name="finetune_label",
                                     shape=[None, self.num_tasks],
                                     dtype="float32")
        valid = layers.data("valid",
                            shape=[None, self.num_tasks],
                            dtype="float32")

        node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test)
        graph_repr = pgl.layers.graph_pooling(graph_wrapper, node_repr,
                                              self.pool_type)
        logits = layers.fc(graph_repr, size=self.num_tasks, name="finetune_fc")

        loss = layers.sigmoid_cross_entropy_with_logits(x=logits,
                                                        label=finetune_label)
        loss = layers.reduce_sum(loss * valid) / layers.reduce_sum(valid)
        pred = layers.sigmoid(logits)

        self.graph_wrapper = graph_wrapper
        self.loss = loss
        self.pred = pred
        self.finetune_label = finetune_label
Пример #9
0
    def __init__(self, args, dataset):
        self.args = args
        self.dataset = dataset
        self.hidden_size = args.hidden_size
        self.num_classes = args.num_classes
        self.num_features = args.num_features
        self.pooling_ratio = args.pooling_ratio
        self.dropout_ratio = args.dropout_ratio
        self.batch_size = args.batch_size

        graph_data = []
        g, label = self.dataset[0]
        graph_data.append(g)
        g, label = self.dataset[1]
        graph_data.append(g)

        batch_graph = MultiGraph(graph_data)
        indegree = batch_graph.indegree()
        norm = np.zeros_like(indegree, dtype="float32")
        norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5)
        batch_graph.node_feat["norm"] = np.expand_dims(norm, -1)
        graph_data = batch_graph

        self.graph_wrapper = GraphWrapper(
            name="graph", node_feat=graph_data.node_feat_info())
        self.labels = L.data("labels",
                             shape=[None, self.args.num_classes],
                             dtype="int32",
                             append_batch_size=False)

        self.labels_1dim = L.data("labels_1dim",
                                  shape=[None],
                                  dtype="int32",
                                  append_batch_size=False)

        self.graph_id = L.data("graph_id",
                               shape=[None],
                               dtype="int32",
                               append_batch_size=False)

        if self.args.dataset_name == "FRANKENSTEIN":
            self.gcn = gcn
        else:
            self.gcn = norm_gcn

        self.build_model()
Пример #10
0
    def __init__(self, name, edge_types, node_feat={}, edge_feat={}, **kwargs):
        self.__data_name_prefix = name
        self._edge_types = edge_types
        self._multi_gw = {}
        for edge_type in self._edge_types:
            type_name = self.__data_name_prefix + '/' + edge_type
            if node_feat:
                n_feat = node_feat
            else:
                n_feat = {}

            if edge_feat:
                e_feat = edge_feat[edge_type]
            else:
                e_feat = {}

            self._multi_gw[edge_type] = GraphWrapper(
                name=type_name,
                node_feat=n_feat,
                edge_feat=e_feat)
Пример #11
0
    def __init__(self, args, dataset):
        self.args = args
        self.dataset = dataset
        self.hidden_size = self.args.hidden_size
        self.embed_dim = self.args.embed_dim
        self.dropout_prob = self.args.dropout_rate
        self.pool_type = self.args.pool_type
        self._init_vars = []

        graph_data = []
        g, label = self.dataset[0]
        graph_data.append(g)
        g, label = self.dataset[1]
        graph_data.append(g)

        batch_graph = pgl.graph.MultiGraph(graph_data)
        graph_data = batch_graph
        graph_data.edge_feat["feat"] = graph_data.edge_feat["feat"].astype(
            "int64")
        graph_data.node_feat["feat"] = graph_data.node_feat["feat"].astype(
            "int64")
        self.graph_wrapper = GraphWrapper(
            name="graph",
            place=F.CPUPlace(),
            node_feat=graph_data.node_feat_info(),
            edge_feat=graph_data.edge_feat_info())

        self.atom_encoder = AtomEncoder(name="atom", emb_dim=self.embed_dim)
        self.bond_encoder = BondEncoder(name="bond", emb_dim=self.embed_dim)

        self.labels = L.data("labels",
                             shape=[None, self.args.num_class],
                             dtype="float32",
                             append_batch_size=False)

        self.unmask = L.data("unmask",
                             shape=[None, self.args.num_class],
                             dtype="float32",
                             append_batch_size=False)

        self.build_model()
Пример #12
0
    def forward(self, is_test=False):
        """
        Define the forward function,set the parameter layer options.

        Graph wrapper creates a graph data holders that attributes and features 
        in the graph are :code:`fluid.layers.data`.And we provide interface :
        code:`to_feed` to help converting :code:`Graph`data into :code:`feed_dict`.

        Args:
            name: The graph data prefix,here is graph

            node_feat: A list of tuples that decribe the details of node
                    feature tenosr. Each tuple must be (name, shape, dtype)
                    and the first dimension of the shape must be set unknown
                    (-1 or None) or we can easily use :code:`Graph.node_feat_info()`
                    to get the node_feat settings.

            edge_feat: A list of tuples that decribe the details of edge
                    feature tenosr. Each tuple mush be (name, shape, dtype)
                    and the first dimension of the shape must be set unknown
                    (-1 or None) or we can easily use :code:`Graph.edge_feat_info()`
                    to get the edge_feat settings.

        Returns:
            logits: the model prediction.
     
        """
        graph_wrapper = GraphWrapper(name="graph",
                                     node_feat=[
                                         ('atom_type', [None, 1], "int64"),
                                         ('chirality_tag', [None, 1], "int64")
                                     ],
                                     edge_feat=[
                                         ('bond_type', [None, 1], "int64"),
                                         ('bond_direction', [None, 1], "int64")
                                     ])
        node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test)
        graph_repr = pgl.layers.graph_pooling(graph_wrapper, node_repr,
                                              self.pool_type)
        logits = layers.fc(graph_repr, size=self.num_tasks, name="finetune_fc")
        return graph_wrapper, logits
Пример #13
0
    def __init__(self, args, task):

        candi_tasks =  [ "predict_query", "predict_poi",
            "pointwise", "pairwise", "listwise", "listwise_hinge"]

        if task not in candi_tasks:
            raise ValueError("task %s not in %s" % (task, candi_tasks))

        self.norm_score = args.norm_score
        self.ernie_config = ErnieConfig(args.ernie_config_path)
        self.ernie_config.print_config()

        self.city_size = 20000
        self.hidden_size = 64

        self._holder_list = []


        node_feature = [
            ('src_ids', [None, args.max_seq_len], "int64"),
            ('pos_ids', [None, args.max_seq_len], "int64"),
            ('sent_ids', [None, args.max_seq_len], "int64"),
            ('input_mask', [None, args.max_seq_len], "float32"),
            ('node_types', [None], "int32"),
        ]

        if task != 'predict_query':
            self.graph_wrapper = GraphWrapper(
                name="graph", place=F.CPUPlace(), node_feat=node_feature)
            self._holder_list.extend(self.graph_wrapper.holder_list)
        elif task == "predict_query":
            # This is for save_inference_mode for query
            self.graph_wrapper = FakeGraphWrapper(
                node_feat=node_feature)
            self._holder_list.extend(self.graph_wrapper.holder_list)

        self.build_model(args, task)