def _call_one_pair(self, input): x_1 = input[0] x_2 = input[1] if self.norm == 'row': x_1 = tf.nn.l2_normalize(x_1, axis=1) # along each row x_2 = tf.nn.l2_normalize(x_2, axis=1) # along each row elif self.norm == 'col': x_1 = tf.nn.l2_normalize(x_1, axis=0) # along each row x_2 = tf.nn.l2_normalize(x_2, axis=0) # along each row elif self.norm == 'global': x_1 = tf.nn.l2_normalize(x_1) x_2 = tf.nn.l2_normalize(x_2) rtn = tf.reduce_sum( (x_1 - x_2)**2, axis=1, keepdims=True ) # sum along cols to get one pred dist per row (graph pair) s = rtn.get_shape().as_list() if s[0] == FLAGS.batch_size and FLAGS.batch_size != 1: # a list of two BS by D graph-level embedding matrices assert (FLAGS.model == 'siamese_regression_transductive') assert (is_transductive()) else: assert (s[0] == 1) assert (s[1] == 1) return rtn
def _call_one_pair(self, input): x_1 = input[0] x_2 = input[1] assert (x_1.shape == x_2.shape) # assert (x_1.shape[0] == 1) emb_dim = x_1.get_shape().as_list()[1] # # one pair comparison sim_score = interact_two_sets_of_vectors( x_1, x_2, 1, # interact only once W=[tf.eye(emb_dim)], act=self.act) # one pair comparison # sim_score = tf.reduce_sum((x_1 * x_2) ** 2, axis=1, # keepdims=True) # sum along cols to get one pred dist per row (graph pair) s = sim_score.get_shape().as_list() if s[0] == FLAGS.batch_size and FLAGS.batch_size != 1: # a list of two BS by D graph-level embedding matrices assert (FLAGS.model == 'siamese_regression_transductive') assert (is_transductive()) else: assert (s[0] == 1) if self.output_dim == 2: output = tf.concat([sim_score, 1 - sim_score], 1) assert (output.shape == (-1, 2)) else: assert (self.output_dim == 1) output = tf.reshape(sim_score, [-1, 1]) return output
def _supply_laplacians_etc_to_feed_dict(self, feed_dict, pairs, tvt): if is_transductive(): gemb_lookup_ids_1 = [] gemb_lookup_ids_2 = [] for (g1, g2) in pairs: gemb_lookup_ids_1.append(g1.global_id) gemb_lookup_ids_2.append(g2.global_id) feed_dict[self._get_plhdr('gemb_lookup_ids_1', tvt)] = \ gemb_lookup_ids_1 feed_dict[self._get_plhdr('gemb_lookup_ids_2', tvt)] = \ gemb_lookup_ids_2 else: for i, (g1, g2) in enumerate(pairs): feed_dict[self._get_plhdr('features_1', tvt)[i]] = \ g1.get_node_inputs() feed_dict[self._get_plhdr('features_2', tvt)[i]] = \ g2.get_node_inputs() feed_dict[self._get_plhdr('num_nonzero_1', tvt)[i]] = \ g1.get_node_inputs_num_nonzero() feed_dict[self._get_plhdr('num_nonzero_2', tvt)[i]] = \ g2.get_node_inputs_num_nonzero() num_laplacians = 1 for j in range(get_coarsen_level()): for k in range(num_laplacians): feed_dict[ self._get_plhdr('laplacians_1', tvt)[i][j][k]] = \ g1.get_laplacians(j)[k] feed_dict[ self._get_plhdr('laplacians_2', tvt)[i][j][k]] = \ g2.get_laplacians(j)[k] return feed_dict
def _get_ins(self, layer, tvt): if is_transductive(): return self._get_ins_for_transductive_model(layer, tvt) else: ins = [] assert (layer.__class__.__name__ == 'GraphConvolution' or layer.__class__.__name__ == 'GraphConvolutionAttention') for features in (self._get_plhdr('features_1', tvt) + self._get_plhdr('features_2', tvt)): ins.append(features) return ins
def __init__(self, dataset): # The attributes set below determine the save file name. self.dataset = dataset self.valid_percentage = FLAGS.valid_percentage self.node_feat_name = FLAGS.node_feat_name self.node_feat_encoder = FLAGS.node_feat_encoder self.ordering = FLAGS.ordering self.coarsening = FLAGS.coarsening self.supersource = FLAGS.supersource self.random_walk = FLAGS.random_walk self.laplacian = FLAGS.laplacian if is_transductive(): self.transductive_info = 'trans' super().__init__(self.get_name()) print('{} train graphs; {} validation graphs; {} test graphs'.format( len(self.train_gs), len(self.val_gs), len(self.test_gs)))
def _load_train_triples(self, data, ds_calc): triples = [] triples = self._load_real_pairs(data.train_gs, data.train_gs, 'train', 'train', triples, ds_calc) if is_transductive(): # Load more pairs to better train the model, # since it directly optimizes over the embeddings. triples = self._load_real_pairs(data.val_gs, data.train_gs, 'val', 'train', triples, ds_calc) triples = self._load_real_pairs(data.val_gs, data.val_gs, 'val', 'val', triples, ds_calc) triples = self._load_real_pairs(data.test_gs, data.train_gs, 'test', 'train', triples, ds_calc) triples = self._load_real_pairs(data.test_gs, data.val_gs, 'test', 'val', triples, ds_calc) return SelfShuffleList(triples)
def __init__(self, input_dim, data, dist_sim_calculator): self.input_dim = input_dim print('original_input_dim', self.input_dim) if is_transductive(): self._create_transductive_gembs_placeholders( data, FLAGS.batch_size, FLAGS.batch_size) else: self._create_basic_placeholders(FLAGS.batch_size, FLAGS.batch_size, level=get_coarsen_level()) self.train_y_true = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, 1)) self.val_test_y_true = tf.placeholder(tf.float32, shape=(1, 1)) # Build the model. super(SiameseRegressionModel, self).__init__() self.ds_kernel = create_ds_kernel(FLAGS.ds_kernel, get_flags('yeta'), get_flags('scale')) self.train_triples = self._load_train_triples(data, dist_sim_calculator)
def init(self): """Creates the object from scratch, only if a saved version doesn't already exist.""" orig_train_data = load_data(self.dataset, train=True) train_gs, val_gs = self._train_val_split(orig_train_data) test_gs = load_data(self.dataset, train=False).graphs # Note that <graph> and self.<graph> can have different graphs because # of the supersource # option. This turns the graph into a DiGraph and adds a node, # so the graph is # fundamentally changed with the supersource setting. # Use self.<graph> as truth. self.node_feat_encoder = self._create_node_feature_encoder( orig_train_data.graphs + test_gs) self.graph_label_encoder = GraphLabelOneHotEncoder( orig_train_data.graphs + test_gs) self._check_graphs_num(test_gs, 'test') self.train_gs = self.create_model_gs(train_gs, 'train') self.val_gs = self.create_model_gs(val_gs, 'val') self.test_gs = self.create_model_gs(test_gs, 'test') if is_transductive(): self._assign_global_ids() assert (len(train_gs) + len(val_gs) == len(orig_train_data.graphs))