Пример #1
0
    def _build(self):
        num_samples = [layer_info.num_samples for layer_info in self.layer_infos]

        # positive examples pairs embeddings (self.outputs)
        self.samples1, support_sizes1 = self.sample(self.inputs1, self.layer_infos)  # samples :sampled node id
        self.samples2, support_sizes2 = self.sample(self.inputs2, self.layer_infos)
        self.outputs1, self.aggregators = self.aggregate(self.samples1, [self.features], self.dims, num_samples,
                                                         support_sizes1, concat=self.concat, model_size=self.model_size)
        self.outputs2, _ = self.aggregate(self.samples2, [self.features], self.dims, num_samples,
                                          support_sizes2, aggregators=self.aggregators, concat=self.concat,
                                          model_size=self.model_size)

        # negative examples embeddings
        self.neg_sample, neg_support_sizes = self.sample(self.neg_samples, self.layer_infos,
                                                         self.number)
        self.neg_outputs, _ = self.aggregate(self.neg_sample, [self.features], self.dims, num_samples,
                                                 neg_support_sizes, batch_size=self.number,
                                                 aggregators=self.aggregators,
                                                 concat=self.concat, model_size=self.model_size)

        dim_mult = 2 if self.concat else 1

        # unsupervised loss
        self.link_pred_layer = BipartiteEdgePredLayer(dim_mult * self.dims[-1],
                                                      dim_mult * self.dims[-1], self.placeholders, act=tf.nn.sigmoid,
                                                      bilinear_weights=False,
                                                      name='edge_predict')

        for aggregator in self.aggregators:
            for var in aggregator.vars.values():
                tf.add_to_collection("params",var)

        self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1)
        self.outputs2 = tf.nn.l2_normalize(self.outputs2, 1)
        self.neg_outputs = tf.nn.l2_normalize(self.neg_outputs, 1)
Пример #2
0
 def _build(self):
     self.outputs1 = tf.nn.embedding_lookup(self.target_embeds, self.inputs1)
     self.outputs2 = tf.nn.embedding_lookup(self.context_weights, self.inputs2)
     self.true_b = tf.nn.embedding_lookup(self.context_bias, self.inputs2)
     self.neg_outputs = tf.nn.embedding_lookup(self.context_weights, self.neg_samples)
     self.neg_b = tf.nn.embedding_lookup(self.context_bias, self.neg_samples)
     
     self.link_pred_layer = BipartiteEdgePredLayer(self.embedding_dim, self.embedding_dim,
             self.placeholders, bilinear_weights=False)
Пример #3
0
    def build(self):
        """ Wrapper for _build() """
        with tf.variable_scope(self.name):
            self._build()

        # Build sequential layer model
        self.activations.append(self.inputs)
        for layer in self.layers:
            hidden = layer(self.activations[-1])
            self.activations.append(hidden)
        self.embedding = self.activations[-1]  # embedding matrix
        self.embedding = tf.nn.l2_normalize(self.embedding, 1)

        self.outputs1 = tf.nn.embedding_lookup(self.embedding, self.inputs1)
        self.outputs2 = tf.nn.embedding_lookup(self.embedding, self.inputs2)
        self.neg_outputs = tf.nn.embedding_lookup(self.embedding,
                                                  self.neg_samples)

        self.link_pred_layer = BipartiteEdgePredLayer(self.output_dim,
                                                      self.output_dim,
                                                      self.placeholders,
                                                      act=tf.nn.sigmoid,
                                                      bilinear_weights=False,
                                                      name='edge_predict')

        # Store model variables for easy access
        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope=self.name)
        self.vars = {var.name: var for var in variables}

        # Build metrics
        self._loss()
        self._accuracy()
        self.opt_op = self.optimizer.minimize(self.loss)
        self.p_probs = self.link_pred_layer.get_probs(self.outputs1,
                                                      self.outputs2)
Пример #4
0
class Deepwalk(object):
    def __init__(self,
                 placeholders,
                 dict_size,
                 name=None,
                 embedding_dim=50,
                 lr=0.001,
                 **kwargs):
        """ Simple version of Node2Vec/DeepWalk algorithm.

        Args:
            dict_size: the total number of nodes.
            degrees: numpy array of node degrees, ordered as in the data's id_map
            nodevec_dim: dimension of the vector representation of node.
            lr: learning rate of optimizer.
        """

        super(Deepwalk, self).__init__(**kwargs)

        allowed_kwargs = {'name', 'logging', 'model_size'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            name = self.__class__.__name__.lower()
        self.name = name

        logging = kwargs.get('logging', False)
        self.logging = logging

        self.vars = {}

        self.margin = 0.1

        self.placeholders = placeholders
        self.dict_size = dict_size
        self.embedding_dim = embedding_dim
        self.inputs1 = placeholders["batch1"]
        self.inputs2 = placeholders["batch2"]
        self.neg_samples = placeholders["batch3"]
        self.number = placeholders["batch4"]
        self.batch_size = placeholders['batch_size']

        # Model parameters
        self.loss = 0
        self.accuracy = 0

        # tensorflow word2vec tutorial
        self.target_embeds = tf.Variable(tf.random_uniform(
            [self.dict_size, self.embedding_dim], -1.0, 1.0),
                                         name="target_embeds")
        self.context_weights = tf.Variable(tf.truncated_normal(
            [self.dict_size, self.embedding_dim],
            stddev=1.0 / math.sqrt(self.embedding_dim)),
                                           name="context_embeds")
        self.context_bias = tf.Variable(tf.zeros([self.dict_size]),
                                        name="context_bias")

        self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)

        self.build()

    def _build(self):
        self.outputs1 = tf.nn.embedding_lookup(self.target_embeds,
                                               self.inputs1)
        self.outputs2 = tf.nn.embedding_lookup(self.context_weights,
                                               self.inputs2)
        self.true_b = tf.nn.embedding_lookup(self.context_bias, self.inputs2)
        self.neg_outputs = tf.nn.embedding_lookup(self.context_weights,
                                                  self.neg_samples)
        self.neg_b = tf.nn.embedding_lookup(self.context_bias,
                                            self.neg_samples)

        self.link_pred_layer = BipartiteEdgePredLayer(self.embedding_dim,
                                                      self.embedding_dim,
                                                      self.placeholders,
                                                      bilinear_weights=False)

        self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1)
        self.outputs2 = tf.nn.l2_normalize(self.outputs2, 1)
        self.neg_outputs = tf.nn.l2_normalize(self.neg_outputs, 1)

    def build(self):
        self._build()
        # TF graph management
        self._loss()
        self._accuracy()
        self._minimize()

        self.p_probs = self.link_pred_layer.get_probs(self.outputs1,
                                                      self.outputs2)

    def _minimize(self):
        self.opt_op = self.optimizer.minimize(self.loss)

    def _loss(self):
        aff = tf.reduce_sum(tf.multiply(self.outputs1, self.outputs2),
                            1) + self.true_b
        neg_aff = tf.reduce_sum(tf.multiply(self.outputs1, self.neg_outputs),
                                1) + self.neg_b
        # xent_loss
        true_xent = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.ones_like(aff), logits=aff)
        negative_xent = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.zeros_like(neg_aff), logits=neg_aff)
        loss = tf.reduce_sum(true_xent) + tf.reduce_sum(negative_xent)

        self.loss = loss / tf.cast(self.batch_size, tf.float32)
        self.merged_loss = tf.summary.scalar('merged_loss', self.loss)

    def _accuracy(self):
        #shape: [batch_size]
        aff = tf.reduce_sum(self.outputs1 * self.outputs2, axis=1)
        neg_aff = tf.reduce_sum(self.outputs1 * self.neg_outputs, axis=1)
        self.neg_aff = tf.expand_dims(neg_aff, axis=1)
        _aff = tf.expand_dims(aff, axis=1)
        self.aff_all = tf.concat(axis=1, values=[self.neg_aff, _aff])
        size = tf.shape(self.aff_all)[1]
        _, indices_of_ranks = tf.nn.top_k(self.aff_all, k=size)
        _, self.ranks = tf.nn.top_k(-indices_of_ranks, k=size)
        self.mrr = tf.reduce_mean(
            tf.div(1.0, tf.cast(self.ranks[:, -1] + 1, tf.float32)))
        self.merged_mrr = tf.summary.scalar('merged_mrr', self.mrr)
Пример #5
0
class Graphsage(GeneralizedModel):
    def __init__(self, placeholders, features, adj, learning_rate,
            layer_infos, concat=True, aggregator_type="mean",
            model_size="small", identity_dim=0, args=None,
            **kwargs):
        super(Graphsage, self).__init__(**kwargs)
        if aggregator_type == "mean":
            self.aggregator_cls = MeanAggregator
        elif aggregator_type == "seq":
            self.aggregator_cls = SeqAggregator
        elif aggregator_type == "maxpool":
            self.aggregator_cls = MaxPoolingAggregator
        elif aggregator_type == "meanpool":
            self.aggregator_cls = MeanPoolingAggregator
        elif aggregator_type == "gcn":
            self.aggregator_cls = GCNAggregator
        else:
            raise Exception("Unknown aggregator: ", self.aggregator_cls)

        # get info from placeholders...
        self.inputs1 = placeholders["batch1"]
        self.inputs2 = placeholders["batch2"]
        self.neg_samples = placeholders["batch3"]
        self.number = placeholders["batch4"]
        self.model_size = model_size
        self.lr = learning_rate
        self.adj_info = adj
        self.args = args

        if identity_dim > 0:
            self.embeds = tf.get_variable("node_embeddings", [adj.get_shape().as_list()[0], identity_dim])
            tf.add_to_collection("params",self.embeds)
           
        else:
            self.embeds = None
        if features is None:
            if identity_dim == 0:
                raise Exception("Must have a positive value for identity feature dimension if no input features given.")
            self.features = self.embeds
        else:
            self.features = tf.Variable(tf.constant(features, dtype=tf.float32), trainable=False)
            if not self.embeds is None:
                self.features = tf.concat([self.embeds, self.features], axis=1)
        self.concat = concat

        self.dims = [(0 if features is None else features.shape[1]) + identity_dim]
        self.dims.extend([layer_infos[i].output_dim for i in range(len(layer_infos))])
        self.batch_size = placeholders["batch_size"]
        self.placeholders = placeholders
        self.layer_infos = layer_infos

        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)

        self.build()

    def build(self):
        # TF graph management
        self._build()
        self._loss()
        self._accuracy()
        self.loss = self.loss / tf.cast(self.batch_size, tf.float32)
        self.merged_loss = tf.summary.scalar('merged_loss', self.loss)

        grads_and_vars = self.optimizer.compute_gradients(self.loss,var_list=self.params)
        clipped_grads_and_vars = [(tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var)
                                  for grad, var in grads_and_vars]
        self.grad, _ = clipped_grads_and_vars[0]
        self.opt_op = self.optimizer.apply_gradients(clipped_grads_and_vars)
        # define p_probs
        self.p_probs = self.link_pred_layer.get_probs(self.outputs1, self.outputs2)

    def _build(self):
        num_samples = [layer_info.num_samples for layer_info in self.layer_infos]

        # positive examples pairs embeddings (self.outputs)
        self.samples1, support_sizes1 = self.sample(self.inputs1, self.layer_infos)  # samples :sampled node id
        self.samples2, support_sizes2 = self.sample(self.inputs2, self.layer_infos)
        self.outputs1, self.aggregators = self.aggregate(self.samples1, [self.features], self.dims, num_samples,
                                                         support_sizes1, concat=self.concat, model_size=self.model_size)
        self.outputs2, _ = self.aggregate(self.samples2, [self.features], self.dims, num_samples,
                                          support_sizes2, aggregators=self.aggregators, concat=self.concat,
                                          model_size=self.model_size)

        # negative examples embeddings
        self.neg_sample, neg_support_sizes = self.sample(self.neg_samples, self.layer_infos,
                                                         self.number)
        self.neg_outputs, _ = self.aggregate(self.neg_sample, [self.features], self.dims, num_samples,
                                                 neg_support_sizes, batch_size=self.number,
                                                 aggregators=self.aggregators,
                                                 concat=self.concat, model_size=self.model_size)

        dim_mult = 2 if self.concat else 1

        # unsupervised loss
        self.link_pred_layer = BipartiteEdgePredLayer(dim_mult * self.dims[-1],
                                                      dim_mult * self.dims[-1], self.placeholders, act=tf.nn.sigmoid,
                                                      bilinear_weights=False,
                                                      name='edge_predict')

        for aggregator in self.aggregators:
            for var in aggregator.vars.values():
                tf.add_to_collection("params",var)

        self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1)
        self.outputs2 = tf.nn.l2_normalize(self.outputs2, 1)
        self.neg_outputs = tf.nn.l2_normalize(self.neg_outputs, 1)

    def sample(self, inputs, layer_infos, batch_size=None):
        """ Sample neighbors to be the supportive fields for multi-layer convolutions.

        Args:
            inputs: batch inputs
            batch_size: the number of inputs (different for batch inputs and negative samples).
        """

        if batch_size is None:
            batch_size = self.batch_size
        samples = [inputs]
        # size of convolution support at each layer per node
        support_size = 1
        support_sizes = [support_size]
        for k in range(len(layer_infos)):
            t = len(layer_infos) - k - 1
            support_size *= layer_infos[t].num_samples
            sampler = layer_infos[t].neigh_sampler  # uniformly sampling
            node = sampler((samples[k], layer_infos[t].num_samples))
            samples.append(tf.reshape(node, [support_size * batch_size,]))
            support_sizes.append(support_size)
        return samples, support_sizes

    def aggregate(self, samples, input_features, dims, num_samples, support_sizes, batch_size=None,
                  aggregators=None, name=None, concat=False, model_size="small"):
        """ At each layer, aggregate hidden representations of neighbors to compute the hidden representations
            at next layer.
        Args:
            samples: a list of samples of variable hops away for convolving at each layer of the
                network. Length is the number of layers + 1. Each is a vector of node indices.
            input_features: the input features for each sample of various hops away.
            dims: a list of dimensions of the hidden representations from the input layer to the
                final layer. Length is the number of layers + 1.
            num_samples: list of number of samples for each layer.
            support_sizes: the number of nodes to gather information from for each layer.
            batch_size: the number of inputs (different for batch inputs and negative samples).
        Returns:
            The hidden representation at the final layer for all nodes in batch
        """

        if batch_size is None:
            batch_size = self.batch_size

        # length: number of layers + 1
        # sampled node features xv
        hidden = [tf.nn.embedding_lookup(input_features, node_samples) for node_samples in samples]
        new_agg = aggregators is None
        if new_agg:
            aggregators = []
        # num_samples: each layer samples number [10,25]
        for layer in range(len(num_samples)):  # in each layer
            if new_agg:
                dim_mult = 2 if concat and (layer != 0) else 1
                # aggregator at current layer
                if layer == len(num_samples) - 1:
                    aggregator = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1], act=lambda x : x,
                            dropout=self.placeholders['dropout'],
                            name=name, concat=concat, model_size=model_size)
                else:
                    aggregator = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1],
                            dropout=self.placeholders['dropout'],
                            name=name, concat=concat, model_size=model_size)
                aggregators.append(aggregator)
            else:
                aggregator = aggregators[layer]
            # hidden representation at current layer for all support nodes that are various hops away
            next_hidden = []
            # as layer increases, the number of support nodes needed decreases
            for hop in range(len(num_samples) - layer):  # support node
                dim_mult = 2 if concat and (layer != 0) else 1
                neigh_dims = [batch_size * support_sizes[hop],
                              num_samples[len(num_samples) - hop - 1],
                              dim_mult*dims[layer]]
                h = aggregator((hidden[hop],
                                tf.reshape(hidden[hop + 1], neigh_dims)))
                next_hidden.append(h)
            hidden = next_hidden
        return hidden[0], aggregators

    def _loss(self):
        self.params = tf.get_collection("params")
        for var in self.params:
            self.loss += self.args.weight_decay * tf.nn.l2_loss(var)
        self.loss += self.link_pred_layer.loss(self.outputs1, self.outputs2, self.neg_outputs)

    def _accuracy(self):
        # shape: [batch_size]
        aff = self.link_pred_layer.affinity(self.outputs1, self.outputs2)
        # shape : [batch_size x num_neg_samples]
        neg_aff = self.link_pred_layer.affinity(self.outputs1, self.neg_outputs)
        self.neg_aff = tf.expand_dims(neg_aff, axis=1)
        _aff = tf.expand_dims(aff, axis=1)
        self.aff_all = tf.concat(axis=1, values=[self.neg_aff, _aff])
        size = tf.shape(self.aff_all)[1]
        _, indices_of_ranks = tf.nn.top_k(self.aff_all, k=size)
        _, self.ranks = tf.nn.top_k(-indices_of_ranks, k=size)
        self.mrr = tf.reduce_mean(tf.div(1.0, tf.cast(self.ranks[:, -1] + 1, tf.float32)))
        self.merged_mrr = tf.summary.scalar('merged_mrr', self.mrr)
Пример #6
0
class GCN(Model):
    def __init__(self,
                 placeholders,
                 input_dim,
                 embedding_dim=50,
                 lr=0.001,
                 args=None,
                 **kwargs):
        super(GCN, self).__init__(**kwargs)

        self.inputs = placeholders['feats']
        self.inputs1 = placeholders["batch1"]
        self.inputs2 = placeholders["batch2"]
        self.neg_samples = placeholders["batch3"]
        self.input_dim = input_dim
        self.output_dim = embedding_dim
        self.batch_size = placeholders['batch_size']
        self.number = placeholders["batch4"]
        self.placeholders = placeholders
        self.args = args
        self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)

        self.build()

    def _loss(self):
        # Weight decay loss
        for var in self.layers[0].vars.values():
            self.loss += self.args.weight_decay * tf.nn.l2_loss(var)

        self.loss += self.link_pred_layer.loss(self.outputs1, self.outputs2,
                                               self.neg_outputs)
        self.loss = self.loss / tf.cast(self.batch_size, tf.float32)
        self.merged_loss = tf.summary.scalar('merged_loss', self.loss)

    def _accuracy(self):
        # shape: [batch_size]
        aff = self.link_pred_layer.affinity(self.outputs1, self.outputs2)
        neg_aff = self.link_pred_layer.affinity(self.outputs1,
                                                self.neg_outputs)
        self.neg_aff = tf.expand_dims(neg_aff, axis=1)
        _aff = tf.expand_dims(aff, axis=1)
        self.aff_all = tf.concat(axis=1, values=[self.neg_aff, _aff])
        size = tf.shape(self.aff_all)[1]
        _, indices_of_ranks = tf.nn.top_k(self.aff_all, k=size)
        _, self.ranks = tf.nn.top_k(-indices_of_ranks, k=size)
        self.mrr = tf.reduce_mean(
            tf.div(1.0, tf.cast(self.ranks[:, -1] + 1, tf.float32)))
        self.merged_mrr = tf.summary.scalar('merged_mrr', self.mrr)

    def build(self):
        """ Wrapper for _build() """
        with tf.variable_scope(self.name):
            self._build()

        # Build sequential layer model
        self.activations.append(self.inputs)
        for layer in self.layers:
            hidden = layer(self.activations[-1])
            self.activations.append(hidden)
        self.embedding = self.activations[-1]  # embedding matrix
        self.embedding = tf.nn.l2_normalize(self.embedding, 1)

        self.outputs1 = tf.nn.embedding_lookup(self.embedding, self.inputs1)
        self.outputs2 = tf.nn.embedding_lookup(self.embedding, self.inputs2)
        self.neg_outputs = tf.nn.embedding_lookup(self.embedding,
                                                  self.neg_samples)

        self.link_pred_layer = BipartiteEdgePredLayer(self.output_dim,
                                                      self.output_dim,
                                                      self.placeholders,
                                                      act=tf.nn.sigmoid,
                                                      bilinear_weights=False,
                                                      name='edge_predict')

        # Store model variables for easy access
        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope=self.name)
        self.vars = {var.name: var for var in variables}

        # Build metrics
        self._loss()
        self._accuracy()
        self.opt_op = self.optimizer.minimize(self.loss)
        self.p_probs = self.link_pred_layer.get_probs(self.outputs1,
                                                      self.outputs2)

    def _build(self):
        self.layers.append(
            GraphConvolution(input_dim=self.input_dim,
                             output_dim=self.args.hidden1,
                             placeholders=self.placeholders,
                             act=tf.nn.tanh,
                             dropout=True,
                             sparse_inputs=True,
                             logging=self.logging))

        self.layers.append(
            GraphConvolution(input_dim=self.args.hidden1,
                             output_dim=self.output_dim,
                             placeholders=self.placeholders,
                             act=tf.nn.tanh,
                             dropout=True,
                             logging=self.logging))