def test_compute_embedding(): # Setup a simple 3 node graph with arbitrary values test_edges = torch.LongTensor([[1, 0], [1, 2]]) test_adj = torch.sparse.IntTensor(test_edges.t(), torch.ones(test_edges.size(0))) test_nodes = torch.FloatTensor([[1, 1], [0, 1], [2, 3]]) test_net = GAT(input_size=2, output_size=1, K=1, adj=test_adj) # Explicitly set the parameters for testing test_net.W = torch.nn.Parameter(torch.FloatTensor([[[2, 2], [3, 2]]])) test_net.a = torch.nn.Parameter(torch.FloatTensor([[[3, 2, 1, 1]]])) embedding = test_net.compute_embedding(test_nodes, i=1, k=0) # compare to expected value. Results were calculated by hand expected_attention = F.softmax(torch.FloatTensor([19, 32]), dim=0).view(2, 1) expected_output = sum( torch.FloatTensor([[4, 5], [10, 12]]) * expected_attention) # Add some tolerance when comparing try: assert torch.allclose(embedding, expected_output) print("SUCCESS: embedding matches expected values") except Exception as e: print("FAILED: ", e)
def __init__(self, args, convolution_method): super(Net, self).__init__() self.hierarchical_num = args.hierarchical_num embed_method = convolution_method self.embeds = nn.ModuleList() if embed_method == 'GCN': self.embed = GCNBlock(args.input_dim, args.hidden_dim, args.bn, args.gcn_res, args.gcn_norm, args.dropout, args.relu) for i in range(self.hierarchical_num): self.embeds.append( GCNBlock(args.hidden_dim, args.hidden_dim, args.bn, args.gcn_res, args.gcn_norm, args.dropout, args.relu)) elif embed_method == 'GAT': self.embed = GAT(args.input_dim, args.hidden_dim, args.dropout, 0.2, 2) for i in range(self.hierarchical_num): self.embeds.append( GAT(args.hidden_dim, args.hidden_dim, args.dropout, 0.2, 2)) elif embed_method == 'GraphSage': self.embed = SageGCN(args.input_dim, args.hidden_dim) for i in range(self.hierarchical_num): self.embeds.append(SageGCN(args.hidden_dim, args.hidden_dim)) self.muchPools = nn.ModuleList() for i in range(self.hierarchical_num): self.muchPools.append(MuchPool(args)) if args.readout == 'mean': self.readout = self.mean_readout elif args.readout == 'sum': self.readout = self.sum_readout self.mlpc = MLPClassifier(input_size=args.hidden_dim, hidden_size=args.hidden_dim, num_class=args.num_class)
def __init__(self, vectorizer): super().__init__() self.vectorizer = vectorizer self.gat1 = GAT(len(vectorizer.get_feature_names()), output_size=8, K=8) self.gat2 = GATFinal(64, output_size=2, K=1)
def build_model(model_key, dataset, g, in_feats, n_classes): """ Returns a model instance based on --model command-line arg and dataset """ if model_key == 'MLP': return MLP(in_feats, 64, n_classes, 1, F.relu, 0.5) elif model_key == 'GCN': return GCN(g, in_feats, 16, n_classes, 1, F.relu, 0.5) elif model_key == 'GCN-64': return GCN(g, in_feats, 64, n_classes, 1, F.relu, 0.5) elif model_key == 'GAT': # Default args from paper num_heads = 8 num_out_heads = 8 if dataset == 'pubmed' else 1 num_layers = 1 # one *hidden* layer heads = ([num_heads] * num_layers) + [num_out_heads] return GAT( g, num_layers, in_feats, 8, # hidden units per layer n_classes, heads, F.elu, # activation fun 0.6, # feat dropout 0.6, # attn dropout 0.2, # negative slope for leakyrelu False # Use residual connections ) elif model_key == 'GraphSAGE': return GraphSAGE(g, in_feats, 16, n_classes, 1, F.relu, 0.5, "mean") # Add more models here raise ValueError("Invalid model key")
def __init__(self, nodeNum, hidden_features, latent_feature): super(GATVAE, self).__init__() self.nodeNum = nodeNum self.gat_in = GAT(nodeNum) self.encode_net = [nn.Linear(nodeNum, hidden_features[0]), nn.ReLU()] for i in range(len(hidden_features)-1): self.encode_net.extend([nn.Linear(hidden_features[i], hidden_features[i+1]), nn.ReLU()]) self.encode_net.extend([nn.Linear(hidden_features[-1], latent_feature), nn.ReLU()]) self.encode_net = nn.Sequential(*self.encode_net) self.mu = nn.Linear(latent_feature, latent_feature) self.sigma = nn.Linear(latent_feature, latent_feature) self.decode_net = [nn.Linear(latent_feature, hidden_features[-1]), nn.ReLU()] for i in range(len(hidden_features)-1): self.decode_net.extend([nn.Linear(hidden_features[len(hidden_features)-i-1], hidden_features[len(hidden_features)-i-2]), nn.ReLU()]) self.decode_net.extend([nn.Linear(hidden_features[0], nodeNum), nn.ReLU()]) self.decode_net = nn.Sequential(*self.decode_net) self.gat_out = GAT(nodeNum)
def model_fn(): return GAT(graph, config['class_num'], config['features_num'], config['num_heads'], config['batch_size'], val_batch_size=config['val_batch_size'], test_batch_size=config['test_batch_size'], categorical_attrs_desc=config['categorical_attrs_desc'], hidden_dim=config['hidden_dim'], in_drop_rate=config['in_drop_rate'], attn_drop_rate=config['attn_drop_rate'], neighs_num=config['neighs_num'], hops_num=config['hops_num'], full_graph_mode=config['full_graph_mode'])
def test_GAT(): ft_sizes = 1433 num_class = 7 num_nodes = 2708 hid_units = [8] n_heads = [8, 1] activation = nn.ELU() residual = False input_data = Tensor( np.array(np.random.rand(1, 2708, 1433), dtype=np.float32)) biases = Tensor(np.array(np.random.rand(1, 2708, 2708), dtype=np.float32)) net = GAT(ft_sizes, num_class, num_nodes, hidden_units=hid_units, num_heads=n_heads, attn_drop=0.6, ftr_drop=0.6, activation=activation, residual=residual) _executor.compile(net, input_data, biases)
def main(_): flags_obj = tf.flags.FLAGS euler_graph = tf_euler.dataset.get_dataset(flags_obj.dataset) euler_graph.load_graph() dims = [flags_obj.hidden_dim] * (flags_obj.layers + 1) if flags_obj.run_mode == 'train': metapath = [euler_graph.train_edge_type] * flags_obj.layers else: metapath = [euler_graph.all_edge_type] * flags_obj.layers num_steps = int((euler_graph.total_size + 1) // flags_obj.batch_size * flags_obj.num_epochs) model = GAT(dims, metapath, euler_graph.feature_idx, euler_graph.feature_dim, euler_graph.label_idx, euler_graph.label_dim, flags_obj.num_heads, flags_obj.concat, flags_obj.improved) params = { 'train_node_type': euler_graph.train_node_type[0], 'batch_size': flags_obj.batch_size, 'optimizer': flags_obj.optimizer, 'learning_rate': flags_obj.learning_rate, 'log_steps': flags_obj.log_steps, 'model_dir': flags_obj.model_dir, 'id_file': euler_graph.id_file, 'infer_dir': flags_obj.model_dir, 'total_step': num_steps } config = tf.estimator.RunConfig(log_step_count_steps=None) model_estimator = NodeEstimator(model, params, config) if flags_obj.run_mode == 'train': model_estimator.train() elif flags_obj.run_mode == 'evaluate': model_estimator.evaluate() elif flags_obj.run_mode == 'infer': model_estimator.infer() else: raise ValueError('Run mode not exist!')
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def lstm(self, inp, weights,bias_in,feature_size, nb_nodes, hid_units, n_heads, residual, activation): print("Initializing LSTM...") def lstm_block(linp, pre_state, kweight, bweight, activation): sigmoid = math_ops.sigmoid one = constant_op.constant(1, dtype=dtypes.int32) c, h = pre_state print('【c】',c.shape)#(128,128)\\ (x,x) print('【h】',h.shape)#(128,128)\\(x,x) print('【linp】',linp)#(1,512)\\(n,f) print('【kweight】',kweight)#() #(42,18+x)*(18+x,h) #(sn,sm+h1)*(640,512)\\(n,f+x)*(f+x,h) gate_inputs = math_ops.matmul( array_ops.concat([linp, h], 1), kweight)# 按照第二维度相接 #(1,h)+(h,)=(1,h) print('【gate_inputs】',gate_inputs) gate_inputs = nn_ops.bias_add(gate_inputs, bweight) print('【gate_inputs】',gate_inputs) i, j, f, o = array_ops.split( value=gate_inputs, num_or_size_splits=4, axis=one) forget_bias_tensor = constant_op.constant(1.0, dtype=f.dtype) add = math_ops.add multiply = math_ops.multiply new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))), multiply(sigmoid(i), activation(j))) new_h = multiply(activation(new_c), sigmoid(o)) new_state = [new_c, new_h] print('【new_h】',new_h) return new_h, new_state # unstack对矩阵分解 # transpose多维矩阵转置 perm=[1,0,2] 例如:2*3*4 -> 3*2*4 #转换成3个(2*4)的list ,3表示时间 #time*(samplenum,features) inp = tf.unstack(tf.transpose(inp, perm=[1, 0, 2,3])) print('【inp】',inp) #(128,128) state = [tf.zeros([self.update_batch_size, self.dim_lstm_hidden]), tf.zeros([self.update_batch_size, self.dim_lstm_hidden])] print('【state】',state) output = None for t in range(self.seq_length): mean,var = tf.nn.moments(inp[t], axes = [0]) epsilon = 0.001 W = tf.nn.batch_normalization(inp[t], mean, var, 0.0, 1.0, epsilon) gat_outputs = GAT.inference(W,weights,bias_in ,feature_size, nb_nodes, self.is_train, self.attn_drop, self.ffd_drop ,bias_in, hid_units, n_heads, activation, residual) #(batchsize,nodenum,features) lstm_inputs = gat_outputs[:,self.node_num,:] # Define an input series and encode it with an LSTM. encoder_inputs = lstm_inputs encoder_outputs, state_h = lstm_block(lstm_inputs, state, weights['kernel_lstm'], weights['b_lstm'], tf.nn.tanh) # We discard `encoder_outputs` and only keep the final states. These represent the "context" # vector that we use as the basis for decoding. encoder_states = state_h # Set up the decoder, using `encoder_states` as initial state. # This is where teacher forcing inputs are fed in. decoder_inputs = lstm_inputs # We set up our decoder using `encoder_states` as initial state. # We return full output sequences and return internal states as well. # We don't use the return states in the training model, but we will use them in inference. decoder_outputs, _ = lstm_block(lstm_inputs, state, weights['kernel_lstm'], weights['b_lstm'], tf.nn.tanh) #decoder_dense = Dense(1) # 1 continuous output at each timestep #decoder_outputs = decoder_dense(decoder_outputs,encoder_states) #output, state = lstm_block(lstm_inputs, state, # weights['kernel_lstm'], weights['b_lstm'], # tf.nn.tanh) return decoder_outputs
def train(): if args.model is 'baseline': net = Baseline(in_channels=7, out_channels_1=7, out_channels_2=7, KT_1=4, KT_2=3, num_nodes=39, batch_size=args.batch_size, frames=33, frames_0=12, num_generator=10) elif args.model is 'GAT': net = GAT() elif args.model is 'GAT_edge': net = GAT_edge() else: print('must choose a model in the choices') raise if args.init_type is not None: try: init_weights(net, init_type=args.init_type) except: sys.exit('Load Network <==> Init_weights error!') # net = nn.DataParallel(net) net = net.cuda() accuracy = 0 train_file = 4 train_amount = 6400 # 8144 eval_amount = 3200 num_epoch = train_amount // args.batch_size * train_file train_data = trainSet(39, train_amount, [0, 1, 2, 3]) trainloader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True) batch_loader = iter(trainloader) eval_data = trainSet(39, eval_amount, 4) evalloader = DataLoader(eval_data, batch_size=args.batch_size, shuffle=True) eval_iter = iter(evalloader) optimizer = optim.Adam(net.parameters(), lr=args.lr) net.train() # train ------------------------------------------------ print('---- epoch start ----') start_time = time.time() for epoch in range(num_epoch): # load train data try: Y, infos, labels = next(batch_loader) Y, infos, labels = Y.float().cuda(), infos.float().cuda( ), labels.float().cuda() except StopIteration: batch_iterator = iter(trainloader) Y, infos, labels = next(batch_iterator) Y, infos, labels = Y.float().cuda(), infos.float().cuda( ), labels.float().cuda() label_predicted = net(Y, infos) # loss = MSE_loss(label_predicted, labels.long()) # criteria = nn.BCELoss() loss = MSE_loss(label_predicted, labels.long()) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(net.parameters(), max_norm=20, norm_type=2) optimizer.step() print('epoch:{}/{} | loss:{:.4f}'.format(epoch + 1, num_epoch, loss.item())) with open(args.log_folder + 'loss.log', mode='a') as f: f.writelines('\n epoch:{}/{} | loss:{:.4f}'.format( epoch + 1, num_epoch, loss.item())) # eval ------------------------------------------------ if epoch % 20 == 0: net.eval() accu, _ = evaluate(model=net, data_iter=eval_iter, data_loader=evalloader, num_epoch=10) print('accuracy:{}'.format(accu)) with open(args.log_folder + 'accu.log', mode='a') as f: f.writelines('\n eval epoch:{} | loss:{:.4f}'.format( epoch // 20 + 1, loss.item())) if accu > accuracy: torch.save( net.state_dict(), args.save_folder + '{}_{}.pth'.format(args.model, accu)) accuracy = accu stop_time = time.time() print("program run for {} s".format(stop_time - start_time))
def main(args): # load and preprocess dataset data = load_data(args) if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) with tf.device(device): features = tf.convert_to_tensor(data.features, dtype=tf.float32) labels = tf.convert_to_tensor(data.labels, dtype=tf.int64) train_mask = tf.convert_to_tensor(data.train_mask, dtype=tf.bool) val_mask = tf.convert_to_tensor(data.val_mask, dtype=tf.bool) test_mask = tf.convert_to_tensor(data.test_mask, dtype=tf.bool) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.numpy().sum(), val_mask.numpy().sum(), test_mask.numpy().sum())) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, tf.nn.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) # loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( # from_logits=False) loss_fcn = tf.nn.sparse_softmax_cross_entropy_with_logits # use optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr, epsilon=1e-8) # initialize graph dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with tf.GradientTape() as tape: tape.watch(model.trainable_weights) logits = model(features, training=True) loss_value = tf.reduce_mean( loss_fcn(labels=labels[train_mask], logits=logits[train_mask])) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss_value = loss_value + \ args.weight_decay*tf.nn.l2_loss(weight) grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss_value.numpy().item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_weights('es_checkpoint.pb') acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): if args.gpu<0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) # batch_size = args.batch_size # cur_step = 0 # patience = args.patience # best_score = -1 # best_loss = 10000 # # define loss function # loss_fcn = torch.nn.BCEWithLogitsLoss() # create the dataset train_dataset = LegacyPPIDataset(mode='train') valid_dataset = LegacyPPIDataset(mode='valid') test_dataset = LegacyPPIDataset(mode='test') # nxg = valid_dataset.graph.to_networkx().to_undirected() # comps = [comp for comp in nx.connected_components(nxg) if len(comp)>10] # print(len(comps)) # exit() cross_valid_list = [] for i in range(5): cross_valid_list.append(list(range(4*i, 4*(i + 1)))) cross_train_dataset = copy.copy(train_dataset) valid_precision = [] valid_recall = [] valid_scores = [] test_precision = [] test_recall = [] test_scores = [] for ind, valid_list in enumerate(cross_valid_list): batch_size = args.batch_size cur_step = 0 patience = args.patience best_score = -1 best_loss = 10000 # define loss function loss_fcn = torch.nn.BCEWithLogitsLoss() train_list = [ind for ind in range(20) if ind not in valid_list] print('Train List: {}'.format(train_list)) print('Valid List: {}'.format(valid_list)) modify(train_dataset, cross_train_dataset, train_list, mode='train', offset=0) modify(valid_dataset, cross_train_dataset, valid_list, mode='valid', offset=16) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate) n_classes = train_dataset.labels.shape[1] num_feats = train_dataset.features.shape[1] g = train_dataset.graph heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] # define the model model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.alpha, args.residual) # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) model = model.to(device) for epoch in range(args.epochs): model.train() loss_list = [] for batch, data in enumerate(train_dataloader): subgraph, feats, labels = data feats = feats.to(device) labels = labels.to(device) model.g = subgraph for layer in model.gat_layers: layer.g = subgraph logits = model(feats.float()) loss = loss_fcn(logits, labels.float()) optimizer.zero_grad() loss.backward() optimizer.step() loss_list.append(loss.item()) loss_data = np.array(loss_list).mean() print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data), end=' ') if epoch % 1 == 0: score_list = [] val_loss_list = [] for batch, valid_data in enumerate(valid_dataloader): subgraph, feats, labels = valid_data feats = feats.to(device) labels = labels.to(device) prec, recall, score, val_loss = evaluate(feats.float(), model, subgraph, labels.float(), loss_fcn) score_list.append([prec, recall, score]) val_loss_list.append(val_loss) mean_score = np.array(score_list).mean(axis=0) mean_val_loss = np.array(val_loss_list).mean() print("| Valid Precision: {:.4f} | Valid Recall: {:.4f} | Valid F1-Score: {:.4f} ".format(mean_score[0], mean_score[1], mean_score[2]), end = ' ') test_score_list = [] for batch, test_data in enumerate(test_dataloader): subgraph, feats, labels = test_data feats = feats.to(device) labels = labels.to(device) test_prec, test_rec, test_score, _ = evaluate(feats, model, subgraph, labels.float(), loss_fcn) test_score_list.append([test_prec, test_rec, test_score]) mean_test_score = np.array(test_score_list).mean(axis=0) print("| Test Precision: {:.4f} | Test Recall: {:.4f} | Test F1-Score: {:.4f}".format(mean_test_score[0], mean_test_score[1], mean_test_score[2])) if epoch == args.epochs - 1: valid_precision.append(round(mean_score[0], 4)) valid_recall.append(round(mean_score[1], 4)) valid_scores.append(round(mean_score[2], 4)) test_precision.append(round(mean_test_score[0], 4)) test_recall.append(round(mean_test_score[1], 4)) test_scores.append(round(mean_test_score[2], 4)) # early stop if mean_score[2] > best_score or best_loss > mean_val_loss: if mean_score[2] > best_score and best_loss > mean_val_loss: val_early_loss = mean_val_loss val_early_score = mean_score[2] best_score = np.max((mean_score[2], best_score)) best_loss = np.min((best_loss, mean_val_loss)) cur_step = 0 else: cur_step += 1 if cur_step == patience: valid_precision.append(round(mean_score[0], 4)) valid_recall.append(round(mean_score[1], 4)) valid_scores.append(round(mean_score[2], 4)) test_precision.append(round(mean_test_score[0], 4)) test_recall.append(round(mean_test_score[1], 4)) test_scores.append(round(mean_test_score[2], 4)) break print('Valid Scores: {}'.format(valid_scores)) print('Test Scores: {}'.format(test_scores)) out_matrix = np.stack([valid_precision, valid_recall, valid_scores, test_precision, test_recall, test_scores], axis=1) np.savetxt('results.csv', out_matrix, delimiter=',')
def main(training_file, dev_file, test_file, graph_type=None, net=None, epochs=None, patience=None, grid_width=None, image_width=None, batch_size=None, num_hidden=None, heads=None, gnn_layers=None, cnn_layers=None, nonlinearity=None, residual=None, lr=None, weight_decay=None, in_drop=None, alpha=None, attn_drop=None, cuda=None, fw='dgl', index=None, previous_model=None): global stop_training if nonlinearity == 'relu': nonlinearity = F.relu elif nonlinearity == 'elu': nonlinearity = F.elu loss_fcn = torch.nn.MSELoss() #(reduction='sum') print('=========================') print('HEADS', heads) #print('OUT_HEADS', num_out_heads) print('GNN LAYERS', gnn_layers) print('CNN LAYERS', cnn_layers) print('HIDDEN', num_hidden) print('RESIDUAL', residual) print('inDROP', in_drop) print('atDROP', attn_drop) print('LR', lr) print('DECAY', weight_decay) print('ALPHA', alpha) print('BATCH', batch_size) print('GRAPH_ALT', graph_type) print('ARCHITECTURE', net) print('=========================') # create the dataset time_dataset_a = time.time() print('Loading training set...') train_dataset = socnavImg.SocNavDataset(training_file, mode='train') print('Loading dev set...') valid_dataset = socnavImg.SocNavDataset(dev_file, mode='valid') print('Loading test set...') test_dataset = socnavImg.SocNavDataset(test_file, mode='test') print('Done loading files') train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate) time_dataset_b = time.time() for _ in range(5): print(f'TIME {time_dataset_b-time_dataset_a}') num_rels = len(socnavImg.get_relations()) cur_step = 0 best_loss = -1 n_classes = num_hidden[-1] print('Number of classes: {}'.format(n_classes)) num_feats = train_dataset.graphs[0].ndata['h'].shape[1] print('Number of features: {}'.format(num_feats)) g = dgl.batch(train_dataset.graphs) #heads = ([num_heads] * gnn_layers) + [num_out_heads] # define the model if fw == 'dgl': if net in ['gat']: model = GAT( g, # graph gnn_layers, # gnn_layers num_feats, # in_dimension num_hidden, # num_hidden 1, grid_width, # grid_width heads, # head nonlinearity, # activation in_drop, # feat_drop attn_drop, # attn_drop alpha, # negative_slope residual, # residual cnn_layers # cnn_layers ) elif net in ['gatmc']: model = GATMC( g, # graph gnn_layers, # gnn_layers num_feats, # in_dimension num_hidden, # num_hidden grid_width, # grid_width image_width, # image_width heads, # head nonlinearity, # activation in_drop, # feat_drop attn_drop, # attn_drop alpha, # negative_slope residual, # residual cnn_layers # cnn_layers ) elif net in ['rgcn']: print( f'CREATING RGCN(GRAPH, gnn_layers:{gnn_layers}, cnn_layers:{cnn_layers}, num_feats:{num_feats}, num_hidden:{num_hidden}, grid_with:{grid_width}, image_width:{image_width}, num_rels:{num_rels}, non-linearity:{nonlinearity}, drop:{in_drop}, num_bases:{num_rels})' ) model = RGCN(g, gnn_layers, cnn_layers, num_feats, num_hidden, grid_width, image_width, num_rels, nonlinearity, in_drop, num_bases=num_rels) else: print('No valid GNN model specified') sys.exit(0) # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # for name, param in model.named_parameters(): # if param.requires_grad: # print(name, param.data.shape) if previous_model is not None: model.load_state_dict(torch.load(previous_model, map_location=device)) model = model.to(device) for epoch in range(epochs): if stop_training: print("Stopping training. Please wait.") break model.train() loss_list = [] for batch, data in enumerate(train_dataloader): subgraph, labels = data subgraph.set_n_initializer(dgl.init.zero_initializer) subgraph.set_e_initializer(dgl.init.zero_initializer) feats = subgraph.ndata['h'].to(device) labels = labels.to(device) if fw == 'dgl': model.g = subgraph for layer in model.layers: layer.g = subgraph if net in ['rgcn']: logits = model( feats.float(), subgraph.edata['rel_type'].squeeze().to(device)) else: logits = model(feats.float()) else: print('Only DGL is supported at the moment here.') sys.exit(1) if net in ['pgat', 'pgcn']: data = Data(x=feats.float(), edge_index=torch.stack( subgraph.edges()).to(device)) else: data = Data( x=feats.float(), edge_index=torch.stack(subgraph.edges()).to(device), edge_type=subgraph.edata['rel_type'].squeeze().to( device)) logits = model(data, subgraph) a = logits ## [getMaskForBatch(subgraph)].flatten() # print('AA', a.shape) # print(a) a = a.flatten() #print('labels', labels.shape) b = labels.float() # print('b') # print(b) b = b.flatten() # print('BB', b.shape) ad = a.to(device) bd = b.to(device) # print(ad.shape, ad.dtype, bd.shape, bd.dtype) loss = loss_fcn(ad, bd) optimizer.zero_grad() a = list(model.parameters())[0].clone() loss.backward() optimizer.step() b = list(model.parameters())[0].clone() not_learning = torch.equal(a.data, b.data) if not_learning: import sys print('Not learning') # sys.exit(1) else: pass # print('Diff: ', (a.data-b.data).sum()) # print(loss.item()) loss_list.append(loss.item()) loss_data = np.array(loss_list).mean() print('Loss: {}'.format(loss_data)) if epoch % 5 == 0: if epoch % 5 == 0: print("Epoch {:05d} | Loss: {:.4f} | Patience: {} | ".format( epoch, loss_data, cur_step), end='') score_list = [] val_loss_list = [] for batch, valid_data in enumerate(valid_dataloader): subgraph, labels = valid_data subgraph.set_n_initializer(dgl.init.zero_initializer) subgraph.set_e_initializer(dgl.init.zero_initializer) feats = subgraph.ndata['h'].to(device) labels = labels.to(device) score, val_loss = evaluate(feats.float(), model, subgraph, labels.float(), loss_fcn, fw, net) score_list.append(score) val_loss_list.append(val_loss) mean_score = np.array(score_list).mean() mean_val_loss = np.array(val_loss_list).mean() if epoch % 5 == 0: print("Score: {:.4f} MEAN: {:.4f} BEST: {:.4f}".format( mean_score, mean_val_loss, best_loss)) # early stop if best_loss > mean_val_loss or best_loss < 0: print('Saving...') directory = str(index).zfill(5) os.system('mkdir ' + directory) best_loss = mean_val_loss # Save the model torch.save(model.state_dict(), directory + '/SNGNN2D.tch') params = { 'loss': best_loss, 'net': net, #str(type(net)), 'fw': fw, 'gnn_layers': gnn_layers, 'cnn_layers': cnn_layers, 'num_feats': num_feats, 'num_hidden': num_hidden, 'graph_type': graph_type, 'n_classes': n_classes, 'heads': heads, 'grid_width': grid_width, 'image_width': image_width, 'F': F.relu, 'in_drop': in_drop, 'attn_drop': attn_drop, 'alpha': alpha, 'residual': residual, 'num_rels': num_rels } pickle.dump(params, open(directory + '/SNGNN2D.prms', 'wb')) cur_step = 0 else: # print(best_loss, mean_val_loss) cur_step += 1 if cur_step >= patience: break test_score_list = [] for batch, test_data in enumerate(test_dataloader): subgraph, labels = test_data subgraph.set_n_initializer(dgl.init.zero_initializer) subgraph.set_e_initializer(dgl.init.zero_initializer) feats = subgraph.ndata['h'].to(device) labels = labels.to(device) test_score_list.append( evaluate(feats, model, subgraph, labels.float(), loss_fcn, fw, net)[1]) print("MSE for the test set {}".format(np.array(test_score_list).mean())) model.eval() return best_loss
def test(test_iter, test_loader, weigths_path, num_epoch, model_type=0, threshold=0.7): if model_type == 0: model = Baseline(in_channels=7, out_channels_1=7, out_channels_2=7, KT_1=4, KT_2=3, num_nodes=39, batch_size=32, frames=33, frames_0=12, num_generator=10) elif model_type == 1: model = GAT() elif model_type == 2: model = GAT_edge() else: raise model.load_state_dict(torch.load(weigths_path)) # model = nn.DataParallel(model) model = model.cuda() model.eval() accu = 0 true_labels = np.array([]) pred_labels = np.array([]) label_float = np.array([]) for epoch in range(num_epoch): try: Y, infos, labels = next(test_iter) Y, infos, labels = Y.float().cuda(), infos.float().cuda( ), labels.type(torch.int32) except StopIteration: batch_iterator = iter(test_loader) Y, infos, labels = next(batch_iterator) Y, infos, labels = Y.float().cuda(), infos.float().cuda( ), labels.type(torch.int32) label_predicted = model(Y, infos) label_float = np.concatenate( (label_float, label_predicted.cpu().reshape((1, -1))[0])) labels_threshold = label_predicted > threshold true_labels = np.concatenate((true_labels, labels.reshape((1, -1))[0])) pred_labels = np.concatenate( (pred_labels, labels_threshold.cpu().reshape((1, -1))[0])) all_right = 1 - torch.mean( (labels ^ labels_threshold.cpu()).type(torch.float32)) print('epoch:{}, accu:{}'.format(epoch, all_right)) accu += all_right accu /= num_epoch plot(confusion_matrix(true_labels, pred_labels)) plt.figure(figsize=(20, 8), dpi=100) distance = 0.1 group_num = int((max(label_float) - min(label_float)) / distance) plt.hist(label_float, bins=group_num) # plt.xticks(range(min(label_float), max(label_float))[::2]) plt.grid(linestyle="--", alpha=0.5) plt.xlabel("label output") plt.ylabel("frequency") plt.savefig('./data/frequency.png') return accu
def main(args): # load and preprocess dataset g, graph_labels = load_graphs( '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin') assert len(g) == 1 g = g[0] data = g.ndata features = torch.FloatTensor(data['feat']) labels = torch.LongTensor(data['label']) if hasattr(torch, 'BoolTensor'): train_mask = data['train_mask'].bool() val_mask = data['val_mask'].bool() test_mask = data['test_mask'].bool() num_feats = features.shape[1] n_classes = 47 n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # add self loop g = add_self_loop(g) # g.remove_edges_from(nx.selfloop_edges(g)) # g = DGLGraph(g) # g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] start = time.time() for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) print(f"Time Consuming {np.sum(dur)}, Overall time {time.time() - start}")
def train(args): # load and preprocess dataset #data = load_data(args) #data = CoraFull() #data = Coauthor('cs') #FIRST, CHECK DATASET path = './dataset/' + str(args.dataset) + '/' ''' edges = np.loadtxt(path + 'edges.txt') edges = edges.astype(int) features = np.loadtxt(path + 'features.txt') train_mask = np.loadtxt(path + 'train_mask.txt') train_mask = train_mask.astype(int) labels = np.loadtxt(path + 'labels.txt') labels = labels.astype(int) ''' edges = np.load(path + 'edges.npy') features = np.load(path + 'features.npy') train_mask = np.load(path + 'train_mask.npy') labels = np.load(path + 'labels.npy') num_edges = edges.shape[0] num_nodes = features.shape[0] num_feats = features.shape[1] n_classes = max(labels) - min(labels) + 1 assert train_mask.shape[0] == num_nodes print('dataset {}'.format(args.dataset)) print('# of edges : {}'.format(num_edges)) print('# of nodes : {}'.format(num_nodes)) print('# of features : {}'.format(num_feats)) features = torch.FloatTensor(features) labels = torch.LongTensor(labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(train_mask) else: train_mask = torch.ByteTensor(train_mask) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() u = edges[:, 0] v = edges[:, 1] #initialize a DGL graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(u, v) # add self loop if isinstance(g, nx.classes.digraph.DiGraph): g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) elif isinstance(g, DGLGraph): g = transform.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] record_time = 0 avg_run_time = 0 Used_memory = 0 for epoch in range(args.num_epochs): #print('epoch = ', epoch) #print('mem0 = {}'.format(mem0)) torch.cuda.synchronize() tf = time.time() model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) now_mem = torch.cuda.max_memory_allocated(0) print('now_mem : ', now_mem) Used_memory = max(now_mem, Used_memory) tf1 = time.time() optimizer.zero_grad() torch.cuda.synchronize() t1 = time.time() loss.backward() torch.cuda.synchronize() optimizer.step() t2 = time.time() run_time_this_epoch = t2 - tf if epoch >= 3: dur.append(time.time() - t0) record_time += 1 avg_run_time += run_time_this_epoch train_acc = accuracy(logits[train_mask], labels[train_mask]) #log for each step print( 'Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb' .format(epoch, run_time_this_epoch, train_acc, (now_mem * 1.0 / (1024**2)))) ''' if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc /{:.4f} | ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) ''' if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) #OUTPUT we need avg_run_time = avg_run_time * 1. / record_time Used_memory /= (1024**3) print('^^^{:6f}^^^{:6f}'.format(Used_memory, avg_run_time))
def train(args): data_dir = args.data_dir edge_dir = args.edge_dir gpu = args.gpu node_f_dim = 23 edge_f_dim = 19 batch_size = args.batch_size num_classes = args.num_classes num_hidden = args.num_hidden num_heads = args.num_heads num_out_heads = args.num_out_heads num_layers = args.num_layers residual = args.residual in_drop = args.in_drop attn_drop = args.attn_drop optim_type = args.optim_type momentum = args.momentum lr = args.lr patience = args.patience weight_decay = args.weight_decay alpha = args.alpha epochs = args.epochs smooth_eps = args.smooth_eps temperature = args.temperature edge_feature_attn = args.edge_feature_attn if gpu >= 0: device = th.device("cuda") else: device = th.device("cpu") trainset = StrokeDataset(data_dir, edge_dir, "train", num_classes) validset = StrokeDataset(data_dir, edge_dir, "valid", num_classes) testset = StrokeDataset(data_dir, edge_dir, "test", num_classes) train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=collate(device)) valid_loader = DataLoader(validset, batch_size=64, shuffle=False, collate_fn=collate(device)) test_loader = DataLoader(testset, batch_size=64, shuffle=False, collate_fn=collate(device)) heads = ([num_heads] * num_layers) + [num_out_heads] model = GAT(num_layers, node_f_dim, edge_f_dim, num_hidden, num_classes, heads, nn.LeakyReLU(alpha), in_drop, attn_drop, alpha, temperature, edge_feature_attn, residual).to(device) # loss_func = nn.CrossEntropyLoss() loss_func = CrossEntropyLoss(smooth_eps=smooth_eps) if optim_type == 'adam': optimizer = optim.Adam(model.parameters(), lr=lr) elif optim_type == 'sgd': optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=patience) epoch_losses = [] best_valid_acc = 0 best_test_acc = 0 best_round = 0 start = time.time() for epoch in range(epochs): epoch_loss = 0 epoch_start = time.time() for it, (fg, lg) in enumerate(train_loader): logits = model(fg) labels = lg.ndata['y'] loss = loss_func(logits, labels) optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss += loss.detach().item() epoch_loss /= (it + 1) epoch_duration = time.time() - epoch_start print('Epoch: {:3d}, loss: {:4f}, speed: {:.2f}doc/s'.format( epoch, epoch_loss, len(trainset) / epoch_duration)) epoch_losses.append(epoch_loss) train_acc, _ = evaluate(model, train_loader, num_classes, "train") valid_acc, _ = evaluate(model, valid_loader, num_classes, "valid") if valid_acc > best_valid_acc: best_valid_acc = valid_acc test_acc, test_conf_mat = evaluate(model, test_loader, num_classes, "test") best_conf_mat = test_conf_mat best_round = epoch scheduler.step(valid_acc) cur_learning_rate = optimizer.param_groups[0]['lr'] print('Learning rate: {:10f}'.format(cur_learning_rate)) epoch_duration = time.time() - epoch_start if cur_learning_rate <= 1e-6: break print("Best round: %d" % best_round) print_result(best_conf_mat) duration = time.time() - start print("Time cost: {:.4f}s".format(duration)) return test_acc
def main(args): if args.gpu < 0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) writer = SummaryWriter() batch_size = args.batch_size # cur_step = 0 # patience = args.patience # best_score = -1 # best_loss = 10000 # define loss function loss_fcn = torch.nn.BCEWithLogitsLoss() # create the dataset train_dataset = LegacyPPIDataset(mode='train') valid_dataset = LegacyPPIDataset(mode='valid') test_dataset = LegacyPPIDataset(mode='test') train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate) n_classes = train_dataset.labels.shape[1] num_feats = train_dataset.features.shape[1] g = train_dataset.graph heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] # define the model model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.alpha, args.bias, args.residual, args.l0) print(model) # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) model = model.to(device) best_epoch = 0 dur = [] acc = [] for epoch in range(args.epochs): num = 0 model.train() if epoch % 5 == 0: t0 = time.time() loss_list = [] for batch, data in enumerate(train_dataloader): subgraph, feats, labels = data feats = feats.to(device) labels = labels.to(device) model.g = subgraph for layer in model.gat_layers: layer.g = subgraph logits = model(feats.float()) loss = loss_fcn(logits, labels.float()) loss_l0 = args.loss_l0 * (model.gat_layers[0].loss) optimizer.zero_grad() (loss + loss_l0).backward() optimizer.step() loss_list.append(loss.item()) num += model.gat_layers[0].num if epoch % 5 == 0: dur.append(time.time() - t0) loss_data = np.array(loss_list).mean() print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data)) writer.add_scalar('edge_num/0', num, epoch) if epoch % 5 == 0: score_list = [] val_loss_list = [] for batch, valid_data in enumerate(valid_dataloader): subgraph, feats, labels = valid_data feats = feats.to(device) labels = labels.to(device) score, val_loss = evaluate(feats.float(), model, subgraph, labels.float(), loss_fcn) score_list.append(score) val_loss_list.append(val_loss) mean_score = np.array(score_list).mean() mean_val_loss = np.array(val_loss_list).mean() print("val F1-Score: {:.4f} ".format(mean_score)) writer.add_scalar('loss', mean_val_loss, epoch) writer.add_scalar('f1/test_f1_mic', mean_score, epoch) acc.append(mean_score) # # early stop # if mean_score > best_score or best_loss > mean_val_loss: # if mean_score > best_score and best_loss > mean_val_loss: # val_early_loss = mean_val_loss # val_early_score = mean_score # torch.save(model.state_dict(), '{}.pkl'.format('save_rand')) # best_epoch = epoch # # best_score = np.max((mean_score, best_score)) # best_loss = np.min((best_loss, mean_val_loss)) # cur_step = 0 # else: # cur_step += 1 # if cur_step == patience: # break test_score_list = [] for batch, test_data in enumerate(test_dataloader): subgraph, feats, labels = test_data feats = feats.to(device) labels = labels.to(device) test_score_list.append( evaluate(feats, model, subgraph, labels.float(), loss_fcn)[0]) acc = np.array(test_score_list).mean() print("test F1-Score: {:.4f}".format(acc)) writer.close()
def main(args): # load and preprocess dataset data = load_data(args) features = mx.nd.array(data.features) labels = mx.nd.array(data.labels) mask = mx.nd.array(np.where(data.train_mask == 1)) test_mask = mx.nd.array(np.where(data.test_mask == 1)) val_mask = mx.nd.array(np.where(data.val_mask == 1)) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() if args.gpu < 0: ctx = mx.cpu() else: ctx = mx.gpu(args.gpu) features = features.as_in_context(ctx) labels = labels.as_in_context(ctx) mask = mask.as_in_context(ctx) test_mask = test_mask.as_in_context(ctx) val_mask = val_mask.as_in_context(ctx) # create graph g = data.graph # add self-loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes, heads, elu, args.in_drop, args.attn_drop, args.alpha, args.residual) stopper = EarlyStopping(patience=100) model.initialize(ctx=ctx) # use optimizer trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr}) dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): logits = model(features) loss = mx.nd.softmax_cross_entropy(logits[mask].squeeze(), labels[mask].squeeze()) loss.backward() trainer.step(mask.shape[0]) if epoch >= 3: dur.append(time.time() - t0) print( "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}" .format(epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000)) val_accuracy = evaluate(model, features, labels, val_mask) print("Validation Accuracy {:.4f}".format(val_accuracy)) if stopper.step(val_accuracy, model): break model.load_parameters('model.param') test_accuracy = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(test_accuracy))
def main(args): if args.gpu < 0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) batch_size = args.batch_size cur_step = 0 patience = args.patience best_score = -1 best_loss = 10000 # define loss function loss_fcn = torch.nn.BCEWithLogitsLoss() # create the dataset # train_dataset = amino_acid_dataset.LegacyAcidDataset(mode='train') # valid_dataset = amino_acid_dataset.LegacyAcidDataset(mode='valid') test_dataset = amino_acid_dataset_test.LegacyAcidDataset(mode='test') # train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate) # valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate) n_classes = test_dataset.labels.shape[1] num_feats = test_dataset.features.shape[1] g = test_dataset.graph heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] # define the model model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.alpha, args.residual) # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) model = model.to(device) # for epoch in range(args.epochs): # model.train() # loss_list = [] # for batch, data in enumerate(train_dataloader): # subgraph, feats, labels = data # # print(feats) # feats = feats.to(device) # labels = labels.to(device) # model.g = subgraph # for layer in model.gat_layers: # layer.g = subgraph # logits = model(feats.float()) # # a = feats.float() # # print(a) # loss = loss_fcn(logits, labels.float()) # optimizer.zero_grad() # loss.backward() # optimizer.step() # loss_list.append(loss.item()) # loss_data = np.array(loss_list).mean() # print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data)) # if epoch % 5 == 0: # score_list = [] # val_loss_list = [] # for batch, valid_data in enumerate(valid_dataloader): # subgraph, feats, labels = valid_data # feats = feats.to(device) # labels = labels.to(device) # score, val_loss = evaluate(feats.float(), model, subgraph, labels.float(), loss_fcn) # score_list.append(score) # val_loss_list.append(val_loss) # mean_score = np.array(score_list).mean() # mean_val_loss = np.array(val_loss_list).mean() # print("F1-Score: {:.4f} ".format(mean_score)) # # early stop # if mean_score > best_score or best_loss > mean_val_loss: # if mean_score > best_score and best_loss > mean_val_loss: # val_early_loss = mean_val_loss # val_early_score = mean_score # best_score = np.max((mean_score, best_score)) # best_loss = np.min((best_loss, mean_val_loss)) # cur_step = 0 # else: # cur_step += 1 # if cur_step == patience: # break # load model model.load_state_dict(torch.load("/home/a503tongxueheng/DGL_GAT/ppi.pt")) model.eval() test_score_list = [] for batch, test_data in enumerate(test_dataloader): subgraph, feats, labels = test_data feats = feats.to(device) labels = labels.to(device) test_score_list.append( evaluate(feats, model, subgraph, labels.float(), loss_fcn)[0]) print("F1-Score: {:.4f}".format(np.array(test_score_list).mean()))
def main(args): if args.gpu < 0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) batch_size = args.batch_size cur_step = 0 patience = args.patience best_score = -1 best_loss = 10000 # define loss function loss_fcn = torch.nn.BCEWithLogitsLoss() # create the dataset train_dataset = LegacyPPIDataset(mode='train') valid_dataset = LegacyPPIDataset(mode='valid') test_dataset = LegacyPPIDataset(mode='test') train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate) n_classes = train_dataset.labels.shape[1] num_feats = train_dataset.features.shape[1] g = train_dataset.graph heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] # define the model model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.alpha, args.residual) # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) model = model.to(device) for epoch in range(args.epochs): model.train() loss_list = [] for batch, data in enumerate(train_dataloader): subgraph, feats, labels = data feats = feats.to(device) labels = labels.to(device) model.g = subgraph for layer in model.gat_layers: layer.g = subgraph logits = model(feats.float()) loss = loss_fcn(logits, labels.float()) optimizer.zero_grad() loss.backward() optimizer.step() loss_list.append(loss.item()) loss_data = np.array(loss_list).mean() print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data)) if epoch % 5 == 0: score_list = [] val_loss_list = [] for batch, valid_data in enumerate(valid_dataloader): subgraph, feats, labels = valid_data feats = feats.to(device) labels = labels.to(device) score, val_loss = evaluate(feats.float(), model, subgraph, labels.float(), loss_fcn) score_list.append(score) val_loss_list.append(val_loss) mean_score = np.array(score_list).mean() mean_val_loss = np.array(val_loss_list).mean() print("F1-Score: {:.4f} ".format(mean_score)) # early stop if mean_score > best_score or best_loss > mean_val_loss: if mean_score > best_score and best_loss > mean_val_loss: val_early_loss = mean_val_loss val_early_score = mean_score best_score = np.max((mean_score, best_score)) best_loss = np.min((best_loss, mean_val_loss)) cur_step = 0 else: cur_step += 1 if cur_step == patience: break test_score_list = [] for batch, test_data in enumerate(test_dataloader): subgraph, feats, labels = test_data feats = feats.to(device) labels = labels.to(device) test_score_list.append( evaluate(feats, model, subgraph, labels.float(), loss_fcn)[0]) print("F1-Score: {:.4f}".format(np.array(test_score_list).mean()))
loss_data = loss_fcn(output, labels[mask].float()) predict = np.where(output.data.cpu().numpy() >= 0.5, 1, 0) score = f1_score(labels[mask].data.cpu().numpy(), predict, average='micro') print("F1-score: {:.4f} ".format(score)) return score, loss_data.item() best_score = -1 best_loss = 10000 best_model = None best_loss_curve = [] val_early_loss = 10000 val_early_score = -1 model = GAT(g, num_feats, 256, n_classes, [4, 4, 6], F.elu, 0.0001, 0.0001, 0.2, True) loss_fcn = torch.nn.BCEWithLogitsLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=0.005) model = model.to(device) save_loss = [] for epoch in range(200): model.train() loss_list = [] for train_batch in batch_list: model.g = g.subgraph(train_batch) for layer in model.gat_layers: layer.g = g.subgraph(train_batch) input_feature = features[train_batch]
def main(args): # load and preprocess dataset if args.dataset == 'reddit': data = RedditDataset() elif args.dataset in ['photo', "computer"]: data = MsDataset(args) else: data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() current_time = time.strftime('%d_%H:%M:%S', localtime()) writer = SummaryWriter(log_dir='runs/' + current_time + '_' + args.sess, flush_secs=30) print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.bool().cuda() val_mask = val_mask.bool().cuda() test_mask = test_mask.bool().cuda() g = data.graph # add self loop if args.dataset != 'reddit': g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() print('edge number %d'%(n_edges)) # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.idrop, args.adrop, args.alpha, args.bias, args.residual, args.l0) print(model) if args.early_stop: stopper = EarlyStopping(patience=150) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) dur = [] time_used = 0 for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) loss_l0 = args.loss_l0*( model.gat_layers[0].loss) optimizer.zero_grad() (loss + loss_l0).backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) writer.add_scalar('edge_num/0', model.gat_layers[0].num, epoch) if args.fastmode: val_acc, loss = accuracy(logits[val_mask], labels[val_mask], loss_fcn) else: val_acc,_ = evaluate(model, features, labels, val_mask, loss_fcn) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) writer.add_scalar('loss', loss.item(), epoch) writer.add_scalar('f1/train_f1_mic', train_acc, epoch) writer.add_scalar('f1/test_f1_mic', val_acc, epoch) writer.add_scalar('time/time', time_used, epoch) writer.close() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc, _ = evaluate(model,features, labels, test_mask, loss_fcn) print("Test Accuracy {:.4f}".format(acc))
def main(args): data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.FloatTensor(data.labels) train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) g = data.graph n_feats = features.shape[1] n_labels = data.num_labels n_edges = g.number_of_edges() print("""----Data statistics------' #Features %d #Edges %d #Labels %d #Train samples %d #Val samples %d #Test samples %d""" % (n_feats, n_edges, n_labels, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) dataset_train = CampusDataset(features, labels) dict_users = iid_users(dataset_train, args.n_users) if args.gnnbase == 'gcn': g = DGLGraph(g) n_edges = g.number_of_edges() degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata['norm'] = norm.unsqueeze(1) model = GCN(g, n_feats, args.n_hidden, n_labels, args.n_layers, F.relu, args.dropout) if args.gnnbase == 'gat': g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() heads = ([args.n_heads] * args.n_layers) + [args.n_out_heads] model = GAT(g, args.n_layers, n_feats, args.n_hidden, n_labels, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) if args.gnnbase == 'sage': g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) n_edges = g.number_of_edges() model = GraphSAGE(g, n_feats, args.n_hidden, n_labels, args.n_layers, F.relu, args.dropout, args.aggregator_type) print(model) model.train() w_glob = model.state_dict() loss_train = [] timecost = [] for epoch in range(args.n_epochs): time_begin = time.time() w_locals, loss_locals = [], [] m = max(int(args.frac * args.n_users), 1) idxs_users = np.random.choice(range(args.n_users), m, replace=False) for idx in idxs_users: local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx], mask=train_mask) w, loss = local.train(model=copy.deepcopy(model)) w_locals.append(copy.deepcopy(w)) loss_locals.append(copy.deepcopy(loss)) w_glob = FedAvg(w_locals) model.load_state_dict(w_glob) time_end = time.time() timecost.append(time_end - time_begin) loss_avg = sum(loss_locals) / len(loss_locals) print('Epoch {:3d}, Average loss {:.3f}'.format(epoch, loss_avg)) loss_train.append(loss_avg) train_errX, train_errY = eval_error(model, features, labels, train_mask) val_errX, val_errY = eval_error(model, features, labels, val_mask) test_errX, test_errY = eval_error(model, features, labels, test_mask) print( "Epoch {:3d} | TrainRMSEX {:.4f} | TrainRMSEY {:.4f} | ValRMSEX {:.4f} | ValRMSEY {:.4f} | TestRMSEX {:.4f} | TestRMSEY {:.4f}" .format(epoch, train_errX, train_errY, val_errX, val_errY, test_errX, test_errY)) print("Time cost {:.4f}".format(sum(timecost) / args.n_epochs)) base_errX, base_errY = calc_error(features[test_mask, :2], labels[test_mask]) print("TestRMSEX-Base {:.4f} | TestRMSEY-Base {:.4f}".format( base_errX, base_errY))
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = data.graph # add self loop g.remove_edges_from(g.selfloop_edges()) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels = load_reg_data(args) num_feats = features.shape[1] n_edges = g.number_of_edges() print("""----Data statistics------' #use cuda: %d #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (args.gpu, n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() ind_features = ind_features.cuda() labels = labels.cuda() ind_labels = ind_labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual, args.bias) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward pred = model(features) loss = loss_fcn(pred[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_r2 = compute_r2(pred[train_mask], labels[train_mask]) if args.fastmode: val_r2 = compute_r2(pred[val_mask], labels[val_mask]) else: val_r2 = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_r2, model): break if epoch > 3: print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainR2 {:.4f} |" " Val R2 {:.4f} | ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), train_r2, val_r2, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) evaluate_test(model, features, labels, test_mask, lp_dict, meta="2012") evaluate_test(model, ind_features, ind_labels, test_mask, lp_dict, meta="2016")
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.to(ctx) features = g.ndata['feat'] labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx) mask = g.ndata['train_mask'] mask = mx.nd.array(np.nonzero(mask.asnumpy())[0], ctx=ctx) val_mask = g.ndata['val_mask'] val_mask = mx.nd.array(np.nonzero(val_mask.asnumpy())[0], ctx=ctx) test_mask = g.ndata['test_mask'] test_mask = mx.nd.array(np.nonzero(test_mask.asnumpy())[0], ctx=ctx) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes, heads, elu, args.in_drop, args.attn_drop, args.alpha, args.residual) if args.early_stop: stopper = EarlyStopping(patience=100) model.initialize(ctx=ctx) # use optimizer trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr}) dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): logits = model(features) loss = mx.nd.softmax_cross_entropy(logits[mask].squeeze(), labels[mask].squeeze()) loss.backward() trainer.step(mask.shape[0]) if epoch >= 3: dur.append(time.time() - t0) print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000)) val_accuracy = evaluate(model, features, labels, val_mask) print("Validation Accuracy {:.4f}".format(val_accuracy)) if args.early_stop: if stopper.step(val_accuracy, model): break print() if args.early_stop: model.load_parameters('model.param') test_accuracy = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(test_accuracy))
from tensorflow.python.keras.optimizers import Adam from utils import plot_embeddings,load_data_v1 if __name__ == "__main__": # Read data FEATURE_LESS = False A, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data_v1( 'cora') A = A + sp.eye(A.shape[0]) features /= features.sum(axis=1, ).reshape(-1, 1) model = GAT(adj_dim=A.shape[0], feature_dim=features.shape[1], num_class=y_train.shape[1], num_layers=2, n_attn_heads=8, att_embedding_size=8, dropout_rate=0.6, l2_reg=2.5e-4, use_bias=True) optimizer = Adam(lr=0.005) model.compile(optimizer=optimizer, loss='categorical_crossentropy', weighted_metrics=['categorical_crossentropy', 'acc']) model_input = [features, A.toarray()] val_data = (model_input, y_val, val_mask) mc_callback = ModelCheckpoint('./best_model.h5', monitor='val_weighted_categorical_crossentropy', save_best_only=True, save_weights_only=True)