def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False): embedding = layers.embedding(inputs, embedding_dim=64) attns = [] for _ in range(n_heads[0]): attns.append(layers.attn_head(embedding, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) h_1 = tf.concat(attns, axis=-1) for i in range(1, len(hid_units)): h_old = h_1 attns = [] for _ in range(n_heads[i]): attns.append(layers.attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) h_1 = tf.concat(attns, axis=-1) out = [] for i in range(n_heads[-1]): out.append(layers.attn_head(h_1, bias_mat=bias_mat, out_sz=nb_classes, activation=lambda x: x, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) logits = tf.add_n(out) / n_heads[-1] return logits
def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False): # 第一层, 多头之间使用concatenation attns = [] for _ in range(n_heads[0]): attns.append(layers.attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) h_1 = tf.concat(attns, axis=-1) # 中间层, 多头之间使用concatenation for i in range(1, len(hid_units)): h_old = h_1 attns = [] for _ in range(n_heads[i]): attns.append(layers.attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) h_1 = tf.concat(attns, axis=-1) # 最后一层,正如论文所述,这一层没有采用concatenation,而是采用average out = [] for i in range(n_heads[-1]): out.append(layers.attn_head(h_1, bias_mat=bias_mat, out_sz=nb_classes, activation=lambda x: x, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) logits = tf.add_n(out) / n_heads[-1] return logits
def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False, simplify=False): # DO THE SHIT THING attns = [] for _ in range(n_heads[0]): # 4 attns.append( layers.attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False, simplify=simplify)) h_1 = tf.concat(attns, axis=-1) for i in range(1, len(hid_units)): # i = 1 # DO THE SHIT THING attns = [] for _ in range(n_heads[i]): # 4 attns.append( layers.attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual, simplify=simplify)) h_1 = tf.concat(attns, axis=-1) out = [] for i in range(n_heads[-1]): out.append( layers.attn_head(h_1, bias_mat=bias_mat, out_sz=nb_classes, activation=lambda x: x, in_drop=ffd_drop, coef_drop=attn_drop, residual=False, simplify=False)) logits = tf.add_n(out) / n_heads[-1] return logits
def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False): # multiple heads layer (head = n_heads[0], out_sz = head * hid_units[0]) attns = [] for _ in range(n_heads[0]): attns.append( layers.attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) h_1 = tf.concat(attns, axis=-1) # multiple heads layers for i in range(1, len(hid_units)): # index 0 1 2 3 ... -2 -1 # n_head √ √ √ √ √ √ √ # hid_units √ √ √ √ √ √ # text input h i d d e n l a y e r s output h_old = h_1 attns = [] for _ in range(n_heads[i]): attns.append( layers.attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) h_1 = tf.concat(attns, axis=-1) out = [] for i in range(n_heads[-1]): out.append( layers.attn_head( h_1, bias_mat=bias_mat, out_sz=nb_classes, activation=lambda x: x, # linear activation in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) logits = tf.add_n(out) / n_heads[-1] return logits
def inference(inputs, adjs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False): attns = [] for j in range(n_heads[0]): attns.append( layers.attn_head(inputs, adjs=adjs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False, name='layer1_' + str(j))) h_1 = tf.concat(attns, axis=-1) for i in range(1, len(hid_units)): h_old = h_1 attns = [] for j in range(n_heads[i]): attns.append( layers.attn_head(h_1, adjs=adjs, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual, name='layer2_' + str(j))) h_1 = tf.concat(attns, axis=-1) out = [] for i in range(n_heads[-1]): out.append( layers.attn_head(h_1, adjs=adjs, bias_mat=bias_mat, out_sz=nb_classes, activation=lambda x: x, in_drop=ffd_drop, coef_drop=attn_drop, residual=False, name='layer3')) logits = tf.add_n(out) / n_heads[-1] return logits
def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False): # ftr_in, nb_classes, nb_nodes, is_train, # attn_drop=0.6, ffd_drop=0.6, # bias_mat=bias_in, # hid_units=[8], n_heads=[8, 1], # residual=False, activation=nonlinearity attns = [] for _ in range(n_heads[0]): attns.append( layers.attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) # (1, 2708, 8) h_1 = tf.concat(attns, axis=-1) # (1, 2708, 64) for i in range(1, len(hid_units)): # h_old = h_1 attns = [] for _ in range(n_heads[i]): attns.append( layers.attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) h_1 = tf.concat(attns, axis=-1) out = [] for i in range(n_heads[-1]): out.append( layers.attn_head(h_1, bias_mat=bias_mat, out_sz=nb_classes, activation=lambda x: x, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) logits = tf.add_n(out) / n_heads[-1] # (1, 2708, 7) return logits
def inference(self, inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False): attns = [] for _ in range(n_heads[0]): attns.append( layers.attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) h_1 = tf.concat(attns, axis=-1) for i in range(1, len(hid_units)): h_old = h_1 attns = [] for _ in range(n_heads[i]): attns.append( layers.attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) h_1 = tf.concat(attns, axis=-1) print "h_1", h_1 out = [] for i in range(n_heads[-1]): out.append( layers.attn_head(h_1, bias_mat=bias_mat, out_sz=nb_classes, activation=None, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) #logits = tf.add_n(out) / n_heads[-1] ##logits = tf.add_n(out) / n_heads[-1] logits = tf.concat(out, axis=-1) return logits
def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat_list, hid_units, n_heads, activation=tf.nn.elu, residual=False, mp_att_size=128): embed_list = [] # coef_list = [] for bias_mat in bias_mat_list: attns = [] head_coef_list = [] for _ in range(n_heads[0]): attns.append(layers.attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False, return_coef=return_coef)) h_1 = tf.concat(attns, axis=-1) for i in range(1, len(hid_units)): h_old = h_1 attns = [] for _ in range(n_heads[i]): attns.append(layers.attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) h_1 = tf.concat(attns, axis=-1) embed_list.append(tf.expand_dims(tf.squeeze(h_1), axis=1)) # print('att for mp') multi_embed = tf.concat(embed_list, axis=1) final_embed, att_val = layers.SimpleAttLayer(multi_embed, mp_att_size, time_major=False, return_alphas=True) # print(att_val) # last layer for clf out = [] for i in range(n_heads[-1]): out.append(tf.layers.dense(final_embed, nb_classes, activation=None)) # out.append(layers.attn_head(h_1, bias_mat=bias_mat, # out_sz=nb_classes, activation=lambda x: x, # in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) logits = tf.add_n(out) / n_heads[-1] # logits_list.append(logits) print('de') logits = tf.expand_dims(logits, axis=0) # if return_coef: # return logits, final_embed, att_val, coef_list # else: return logits, final_embed, att_val
def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False): attns = [] # get the attention for first heads for _ in range(n_heads[0]): attns.append( layers.attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) h_1 = tf.concat(attns, axis=-1) for i in range(1, len(hid_units)): # get attention for other heads (propagated from first heads) in the same way attns = [] for _ in range(n_heads[i]): attns.append( layers.attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) h_1 = tf.concat(attns, axis=-1) out = [] for i in range(n_heads[-1]): out.append( layers.attn_head(h_1, bias_mat=bias_mat, out_sz=nb_classes, activation=lambda x: x, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) logits = tf.add_n(out) / n_heads[-1] return logits
def inference(inputs_list, nb_classes, nb_nodes, training, attn_drop, ffd_drop, keep_prob, bias_mat_list, hid_units, n_heads, activation=tf.nn.elu, residual=False): h_1 = [] # attention result of the first layer for inputs, bias_mat in zip(inputs_list, bias_mat_list): # pro w_meta = tf.Variable(tf.random_normal([inputs.shape[-1], 300], stddev=0.1)) x = tf.tensordot(inputs, w_meta, axes=1) # wo_pro # x = inputs attns = [] for _ in range(n_heads[0]): # n_heads[0] = 8 attns.append(layers.attn_head(x, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) h_1.append(tf.concat(attns, axis=-1)) # projection h_2 = tf.concat(h_1, axis=-1) # without_projection # h_2 = tf.zeros_like(h_1[0], dtype=tf.float32) # for h in h_1: # h_2 = h_2 + h # h_2 = tf.nn.dropout(h_2, keep_prob) out = tf.layers.dense(h_2, nb_classes, activation=None) # out = tf.layers.dense(h_2, nb_classes, activation=tf.nn.elu) return out, h_2
def encoder(inputs, nb_nodes, training, attn_drop, ffd_drop, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False): attns = [] for _ in range(n_heads[0]): attn_temp, coefs = layers.attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False) inputs = attn_temp[tf.newaxis] attns.append(attn_temp) h_1 = tf.concat(attns, axis=-1) return h_1, coefs
def inference(inputs_list, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat_list, hid_units, n_heads, features, labels, activation=tf.nn.elu, residual=False, mp_att_size=200, feature_size=100): #Metric Learning temp = inputs_list[0] # temp2 = tf.reduce_sum(temp, 0) # print ("temp2 check: ", temp2) MetricInputs = tf.layers.dense(temp, feature_size, activation=None) # ExpendMetricInputs = tf.expand_dims(MetricInputs, 0) print ("ExpendMetricInputs: check :", MetricInputs) inputs_list = [MetricInputs] # tests mp_att_size = 200 embed_list = [] for inputs, bias_mat in zip(inputs_list, bias_mat_list): attns = [] jhy_embeds = [] for _ in range(n_heads[0]): attns.append(layers.attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) h_1 = tf.concat(attns, axis=-1) for i in range(1, len(hid_units)): h_old = h_1 attns = [] for _ in range(n_heads[i]): attns.append(layers.attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) h_1 = tf.concat(attns, axis=-1) embed_list.append(tf.expand_dims(tf.squeeze(h_1), axis=1)) multi_embed = tf.concat(embed_list, axis=1) print ("multi_embed: ", multi_embed, ", mp_att_size: ", mp_att_size) final_embed, att_val = layers.SimpleAttLayer(multi_embed, mp_att_size, time_major=False, return_alphas=True) # feature_size, labels, features # num_classes, feature_size, labels, features centers_embed = HeteGAT_multi.getCenters(len(set(labels)), feature_size, labels, final_embed) centers_embed = tf.transpose(centers_embed) out = [] for i in range(n_heads[-1]): out.append(tf.layers.dense(final_embed, nb_classes, activation=None)) # out.append(layers.attn_head(h_1, bias_mat=bias_mat, # out_sz=nb_classes, activation=lambda x: x, # in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) logits = tf.add_n(out) / n_heads[-1] # logits_list.append(logits) print('de') logits = tf.expand_dims(logits, axis=0) test_final_embeed = tf.reduce_sum(MetricInputs,0) return logits, final_embed , att_val, centers_embed, test_final_embeed
def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False, export_dict=defaultdict(dict)): # Input attention layer attns = [] for head_i in range(n_heads[0]): scope_name = f"attn_L1_H{head_i}" attns.append( layers.attn_head(scope_name, inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False, export_dict=export_dict[scope_name])) h_1 = tf.concat(attns, axis=-1) # Middle attention layers for i in range(1, len(hid_units)): h_old = h_1 attns = [] for head_i in range(n_heads[i]): scope_name = f"attn_L{i + 1}_H{head_i}" attns.append( layers.attn_head(scope_name, h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual, export_dict=export_dict[scope_name])) h_1 = tf.concat(attns, axis=-1) # Last attention layers out = [] for i in range(n_heads[-1]): scope_name = f"attn_L{len(n_heads)}_H{i}" out.append( layers.attn_head(scope_name, h_1, bias_mat=bias_mat, out_sz=nb_classes, activation=lambda x: x, in_drop=ffd_drop, coef_drop=attn_drop, residual=False, export_dict=export_dict[scope_name])) logits = tf.add_n(out) / n_heads[-1] return logits
def inference(inputs_list, attn_drop, ffd_drop, bias_mat_list, hid_units, n_heads, activation=tf.nn.elu, residual=False, mp_att_size=128): embed_list = [] for inputs, bias_mat in zip(inputs_list, bias_mat_list): attns = [] jhy_embeds = [] for _ in range(n_heads[0]): # 8 attns.append( layers.attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) h_1 = tf.concat(attns, axis=-1) # 1*x*(8*8) for i in range(1, len(hid_units)): #range(1,1) h_old = h_1 attns = [] for _ in range(n_heads[i]): attns.append( layers.attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) h_1 = tf.concat(attns, axis=-1) # h_1: 1*X*64 ==> X*1*64 # embed_list.append(tf.expand_dims(tf.squeeze(h_1), axis=1)) # change API because X != 1 embed_list.append(tf.transpose(h_1, [1, 0, 2])) # size of inputs_list * (X*1*64) ==> X*L*64 multi_embed = tf.concat(embed_list, axis=1) print("inference/multi_embed is {}".format(multi_embed)) final_embed, att_val = layers.SimpleAttLayer(multi_embed, mp_att_size, time_major=False, return_alphas=True) # out = [] # for i in range(n_heads[-1]): # out.append(tf.layers.dense(final_embed, nb_classes, activation=None)) # # out.append(layers.attn_head(h_1, bias_mat=bias_mat, # # out_sz=nb_classes, activation=lambda x: x, # # in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) # logits = tf.add_n(out) / n_heads[-1] # # logits_list.append(logits) # print('de') # logits = tf.expand_dims(logits, axis=0) # return logits, final_embed, att_val return final_embed, att_val