def sgcn(feat_data, labels, lap_matrix, train_nodes, valid_nodes, test_nodes, args, device, calculate_grad_vars=False, full_batch=False): # use multiprocess sample data process_ids = np.arange(args.batch_num) pool = mp.Pool(args.pool_num) lap_matrix_sq = lap_matrix.multiply(lap_matrix) jobs = prepare_data(pool, sampler, process_ids, train_nodes, samp_num_list, len(feat_data), lap_matrix, lap_matrix_sq, args.n_layers) susage = GCN(nfeat=feat_data.shape[1], nhid=args.nhid, num_classes=num_classes, layers=args.n_layers, dropout=args.dropout).to(device) susage.to(device) print(susage) adjs_full, input_nodes_full, sampled_nodes_full = full_batch_sampler( train_nodes, len(feat_data), lap_matrix, args.n_layers) adjs_full = package_mxl(adjs_full, device) optimizer = optim.SGD(filter(lambda p: p.requires_grad, susage.parameters()), lr=args.lr) loss_train = [] loss_test = [] grad_variance_all = [] loss_train_all = [] best_model = copy.deepcopy(susage) best_val_loss = 10 # randomly pick a large number is fine best_val_index = 0 best_val_cnt = 0 for epoch in np.arange(args.epoch_num): # fetch train data train_data = [job.get() for job in jobs] pool.close() pool.join() # prepare next epoch train data pool = mp.Pool(args.pool_num) jobs = prepare_data(pool, sampler, process_ids, train_nodes, samp_num_list, len(feat_data), lap_matrix, lap_matrix_sq, args.n_layers) # it can also run full-batch GD by ignoring all the samplings if full_batch: inner_loop_num = args.batch_num cur_train_loss, cur_train_loss_all, grad_variance = full_step( susage, optimizer, feat_data, labels, train_nodes, valid_nodes, adjs_full, train_data, inner_loop_num, device, calculate_grad_vars=calculate_grad_vars) else: inner_loop_num = args.batch_num cur_train_loss, cur_train_loss_all, grad_variance = sgd_step( susage, optimizer, feat_data, labels, train_nodes, valid_nodes, adjs_full, train_data, inner_loop_num, device, calculate_grad_vars=calculate_grad_vars) loss_train_all.extend(cur_train_loss_all) grad_variance_all.extend(grad_variance) # calculate test loss susage.eval() susage.zero_grad() val_loss, _ = susage.calculate_loss_grad(feat_data, adjs_full, labels, valid_nodes) if val_loss + 5e-4 < best_val_loss: best_val_loss = val_loss del best_model best_model = copy.deepcopy(susage) best_val_index = epoch best_val_cnt = 0 cur_test_loss = val_loss best_val_cnt += 1 loss_train.append(cur_train_loss) loss_test.append(cur_test_loss) # print progress print('Epoch: ', epoch, '| train loss: %.8f' % cur_train_loss, '| test loss: %.8f' % cur_test_loss) if best_val_cnt > 10: break f1_score_test = best_model.calculate_f1(feat_data, adjs_full, labels, test_nodes) return best_model, loss_train, loss_test, loss_train_all, f1_score_test, grad_variance_all, best_val_index
def sgcn_plus_v2(feat_data, labels, lap_matrix, train_nodes, valid_nodes, test_nodes, args, device, calculate_grad_vars=False): # use multiprocess sample data process_ids = np.arange(args.batch_num) pool = mp.Pool(args.pool_num) lap_matrix_sq = lap_matrix.multiply(lap_matrix) jobs = prepare_data(pool, sampler, process_ids, train_nodes, samp_num_list, len(feat_data), lap_matrix, lap_matrix_sq, args.n_layers) susage = GCN(nfeat=feat_data.shape[1], nhid=args.nhid, num_classes=num_classes, layers=args.n_layers, dropout=args.dropout).to(device) susage.to(device) print(susage) adjs_full, input_nodes_full, sampled_nodes_full = full_batch_sampler( train_nodes, len(feat_data), lap_matrix, args.n_layers) adjs_full = package_mxl(adjs_full, device) # this stupid wrapper is only used for sgcn++ forward_wrapper = ForwardWrapperMomentum(len(feat_data), args.nhid, args.n_layers, num_classes) optimizer = optim.SGD(filter(lambda p: p.requires_grad, susage.parameters()), lr=0.1) loss_train = [] loss_test = [] grad_variance_all = [] loss_train_all = [] best_model = copy.deepcopy(susage) best_val_loss = 10 # randomly pick a large number is fine best_val_index = 0 best_val_cnt = 0 for epoch in np.arange(args.epoch_num): # fetch train data train_data = [job.get() for job in jobs] pool.close() pool.join() # prepare next epoch train data pool = mp.Pool(args.pool_num) jobs = prepare_data(pool, sampler, process_ids, train_nodes, samp_num_list, len(feat_data), lap_matrix, lap_matrix_sq, args.n_layers) inner_loop_num = args.batch_num # compare with sgcn_plus, the only difference is we use multi_level_spider_step_v1 here cur_train_loss, cur_train_loss_all, grad_variance = multi_level_momentum_step( susage, optimizer, feat_data, labels, train_nodes, valid_nodes, adjs_full, sampled_nodes_full, train_data, inner_loop_num, forward_wrapper, device, calculate_grad_vars=calculate_grad_vars) loss_train_all.extend(cur_train_loss_all) grad_variance_all.extend(grad_variance) # calculate validate loss susage.eval() susage.zero_grad() val_loss, _ = susage.calculate_loss_grad(feat_data, adjs_full, labels, valid_nodes) if val_loss + 0.01 < best_val_loss: best_val_loss = val_loss del best_model best_model = copy.deepcopy(susage) best_val_index = epoch best_val_cnt = 0 cur_test_loss = val_loss best_val_cnt += 1 loss_train.append(cur_train_loss) loss_test.append(cur_test_loss) # print progress print('Epoch: ', epoch, '| train loss: %.8f' % cur_train_loss, '| test loss: %.8f' % cur_test_loss) if best_val_cnt > 10: break f1_score_test = best_model.calculate_f1(feat_data, adjs_full, labels, test_nodes) return best_model, loss_train[: best_val_index], loss_test[: best_val_index], loss_train_all, f1_score_test, grad_variance_all
def main(): logger = logging.getLogger('stdout') logger.setLevel(logging.DEBUG) hdlr = logging.StreamHandler(sys.stdout) logger.addHandler(hdlr) logger.debug('starting training') config_path = "../../../config/gcn.conf" conf = configparser.ConfigParser() conf.read(config_path) logger.debug(f'running on {device}') data_dir = conf['path']['gcn_data'] + '/data.pkl' if not os.path.exists(data_dir): pdb_data.main() try: with open(data_dir, 'rb') as f: data_dict = pickle.load(f) logger.debug('data successfully found') except: logger.error(f'can not find data at {data_dir}') vocab_path = conf['path']['vocab'] with open(vocab_path, 'r') as of: aa_vocab = json.load(of) train_data, val_data, test_data, target_dim = split_data( data_dict, aa_vocab, 'mf') #change to variable GO type later logger.debug(f'size of training set = {len(train_data)}') logger.debug(f'size of validation set = {len(val_data)}') logger.debug(f'# of go terms = {target_dim}') plt.figure(figsize=(10, 6)) lm_model_paths = conf['path']['lm'] if type(lm_model_paths) != list: lm_model_paths = [lm_model_paths] for lm_model_path in lm_model_paths: language_model = torch.load(lm_model_path, map_location=device) lm_embedding = language_model['model_state_dict'][ 'module.word_embeddings.weight'] num_trials = int(conf['model']['num_trials']) for i in range(num_trials): model_name = lm_model_path.split('/')[-1].split( '.')[0] + '_GCN' + '_iter_{}'.format(i) if conf['model']['model'] == 'GCN': model = GCN(target_dim, lm_embedding) elif conf['model']['model'] == 'Toy': model = Toy(target_dim, lm_embedding) model.to(device) logger.debug(f'traning on {model_name}') train(logger, model, train_data, val_data, target_dim) model_dir = conf['path']['models'] torch.save(model, model_dir + '/' + model_name + '.model') scores, labels = evaluate(model, val_data, target_dim) precision, recall, ap_score = compute_pr(scores, labels, target_dim) logger.debug('Average precision score for {}: {}'.format( model_name, ap_score)) plt.step(recall, precision, where='post', label='AP score for {}: {}'.format(model_name, ap_score)) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('micro-averaged PR curve over all classes') plt.legend() figure_dir = conf['path']['figures'] plt.savefig(figure_dir + '/temp.png') logger.debug('plot saved to: ' + figure_dir + '/temp.png')
def main(args, print_fn=print): print_fn("Experiment arguments: {}".format(args)) if args.random_seed: torch.manual_seed(args.random_seed) else: torch.manual_seed(123) # Load dataset if args.dataset.startswith('ogbl'): graph, split_edge = load_ogb_dataset(args.dataset) else: raise NotImplementedError num_nodes = graph.num_nodes() # set gpu if args.gpu_id >= 0 and torch.cuda.is_available(): device = 'cuda:{}'.format(args.gpu_id) else: device = 'cpu' if args.dataset == 'ogbl-collab': # ogbl-collab dataset is multi-edge graph use_coalesce = True else: use_coalesce = False # Generate positive and negative edges and corresponding labels # Sampling subgraphs and generate node labeling features seal_data = SEALData(g=graph, split_edge=split_edge, hop=args.hop, neg_samples=args.neg_samples, subsample_ratio=args.subsample_ratio, use_coalesce=use_coalesce, prefix=args.dataset, save_dir=args.save_dir, num_workers=args.num_workers, print_fn=print_fn) node_attribute = seal_data.ndata['feat'] edge_weight = seal_data.edata['weight'].float() train_data = seal_data('train') val_data = seal_data('valid') test_data = seal_data('test') train_graphs = len(train_data.graph_list) # Set data loader train_loader = GraphDataLoader(train_data, batch_size=args.batch_size, num_workers=args.num_workers) val_loader = GraphDataLoader(val_data, batch_size=args.batch_size, num_workers=args.num_workers) test_loader = GraphDataLoader(test_data, batch_size=args.batch_size, num_workers=args.num_workers) # set model if args.model == 'gcn': model = GCN(num_layers=args.num_layers, hidden_units=args.hidden_units, gcn_type=args.gcn_type, pooling_type=args.pooling, node_attributes=node_attribute, edge_weights=edge_weight, node_embedding=None, use_embedding=True, num_nodes=num_nodes, dropout=args.dropout) elif args.model == 'dgcnn': model = DGCNN(num_layers=args.num_layers, hidden_units=args.hidden_units, k=args.sort_k, gcn_type=args.gcn_type, node_attributes=node_attribute, edge_weights=edge_weight, node_embedding=None, use_embedding=True, num_nodes=num_nodes, dropout=args.dropout) else: raise ValueError('Model error') model = model.to(device) parameters = model.parameters() optimizer = torch.optim.Adam(parameters, lr=args.lr) loss_fn = BCEWithLogitsLoss() print_fn("Total parameters: {}".format(sum([p.numel() for p in model.parameters()]))) # train and evaluate loop summary_val = [] summary_test = [] for epoch in range(args.epochs): start_time = time.time() loss = train(model=model, dataloader=train_loader, loss_fn=loss_fn, optimizer=optimizer, device=device, num_graphs=args.batch_size, total_graphs=train_graphs) train_time = time.time() if epoch % args.eval_steps == 0: val_pos_pred, val_neg_pred = evaluate(model=model, dataloader=val_loader, device=device) test_pos_pred, test_neg_pred = evaluate(model=model, dataloader=test_loader, device=device) val_metric = evaluate_hits(args.dataset, val_pos_pred, val_neg_pred, args.hits_k) test_metric = evaluate_hits(args.dataset, test_pos_pred, test_neg_pred, args.hits_k) evaluate_time = time.time() print_fn("Epoch-{}, train loss: {:.4f}, hits@{}: val-{:.4f}, test-{:.4f}, " "cost time: train-{:.1f}s, total-{:.1f}s".format(epoch, loss, args.hits_k, val_metric, test_metric, train_time - start_time, evaluate_time - start_time)) summary_val.append(val_metric) summary_test.append(test_metric) summary_test = np.array(summary_test) print_fn("Experiment Results:") print_fn("Best hits@{}: {:.4f}, epoch: {}".format(args.hits_k, np.max(summary_test), np.argmax(summary_test)))
# Load data adj, features, labels, idx_train, idx_val, idx_test = load_data() adj, features, labels, idx_train, idx_val, idx_test # Model and optimizer model = GCN(nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1, dropout=0.5) optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) features = features.to(device) adj = adj.to(device) labels = labels.to(device) idx_train = idx_train.to(device) idx_val = idx_val.to(device) idx_test = idx_test.to(device) def train(epoch): t = time.time() model.train() optimizer.zero_grad() output = model(features, adj) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward()
i = torch.from_numpy(features[0]).long().to(device) v = torch.from_numpy(features[1]).to(device) feature = torch.sparse.FloatTensor(i.t(), v, features[2]).to(device) i = torch.from_numpy(supports[0]).long().to(device) v = torch.from_numpy(supports[1]).to(device) support = torch.sparse.FloatTensor(i.t(), v, supports[2]).float().to(device) print('x :', feature) print('sp:', support) num_features_nonzero = feature._nnz() feat_dim = feature.shape[1] net = GCN(feat_dim, num_classes, num_features_nonzero) net.to(device) optimizer = optim.Adam(net.parameters(), lr=args.learning_rate) net.train() for epoch in range(args.epochs): out = net((feature, support)) out = out[0] loss = masked_loss(out, train_label, train_mask) loss += args.weight_decay * net.l2_loss() acc = masked_acc(out, train_label, train_mask) optimizer.zero_grad() loss.backward() optimizer.step()
seed = 2020 random.seed(seed) torch.manual_seed(seed) np.random.seed(seed) plt.rcParams['font.sans-serif'] = ['simhei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 # data train_loader, test_loader = get_loader('PEMS04') gcn = GCN(6, 6, 1) chebnet = ChebNet(6, 6, 1, 1) gat = GAT(6, 6, 1) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") models = [chebnet.to(device), gcn.to(device), gat.to(device)] all_predict_values = [] epochs = 30 for i in range(len(models)): model = models[i] criterion = nn.MSELoss().to(device) optimizer = optim.Adam(params=model.parameters(), lr=3e-2) model.train() for epoch in range(epochs): epoch_loss, epoch_mae, epoch_rmse, epoch_mape = 0.0, 0.0, 0.0, 0.0 num = 0 start_time = time.time() for data in train_loader: # ["graph": [B, N, N] , "flow_x": [B, N, H, D], "flow_y": [B, N, 1, D]] data['graph'], data['flow_x'], data['flow_y'] = data['graph'].to(device), data['flow_x'].to(device), data['flow_y'].to(device) predict_value = model(data) # [0, 1] -> recover
if device == "cuda": torch.cuda.manual_seed(seed) # load dataset A, X, y, train_idx, val_idx, test_idx = load_data(dname, dtype) dfeat = X.shape[1] nclass = y.shape[1] val_window = [] # define model & optimizer model = GCN(dfeat, nhid, nclass, dropout) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd) # Data to cuda? model = model.to(device) X = X.to(device) A = A.to(device) y = y.to(device) train_idx = train_idx.to(device) val_idx = val_idx.to(device) test_idx = test_idx.to(device) print("Start training the model.............") start = time.time() for epoch in range(epochs): model.train() optimizer.zero_grad() output = model(X, A) train_loss = F.nll_loss(output[train_idx], torch.max(y[train_idx], 1)[1]) train_accuracy = accuracy(output[train_idx], torch.max(y[train_idx], 1)[1])
class ModelRunner: def __init__(self, params, logger, data_logger=None, epochs_logger=None): self._logger = logger self._epoch_logger = epochs_logger self._data_logger = EmptyLogger() if data_logger is None else data_logger self._parameters = params self._lr = params["lr"] self._is_nni = params['is_nni'] self._device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') self._mse_loss = self.weighted_mse_loss self._temporal_loss = self.weighted_mse_loss self.model = GCN(num_of_features=self._parameters["feature_matrices"][0].shape[1], hid_size=self._parameters["hid_size"], num_of_classes=self._parameters["number_of_classes"], activation=self._parameters["activation"], dropout=self._parameters["dropout"]) self.model = self.model.to(self._device) self.opt = self._parameters["optimizer"](self.model.parameters(), lr=self._parameters['lr'], weight_decay=self._parameters['weight_decay']) @property def logger(self): return self._logger @property def data_logger(self): return self._data_logger def weighted_mse_loss(self, pred, target, weights=None): if weights is None: return ((pred - target) ** 2).sum(dim=1).sum().to(self._device) elif self._parameters['loss_weights_type'] == 'sqrt(N/Nj)': weights = torch.tensor(weights).to(device=self._device, dtype=torch.float) b = (torch.sqrt((weights).sum() / weights) *(pred - target) ** 2).sum(dim=1).sum().to(self._device) return b elif self._parameters['loss_weights_type'] == '1/Njs': weights = torch.tensor(weights).to(device=self._device, dtype=torch.float) b = (torch.tensor(1. / weights) * (pred - target) ** 2).sum(dim=1).sum().to(self._device) return b def run(self): train_results_l = [] test_results = [] # train for epoch in range(self._parameters["epochs"]): train_results = self.train(epoch) train_results_l.append(train_results) if epoch == self._parameters["epochs"]-1: # for grid test_res = self.test(epoch) test_results.append(test_res) if self._is_nni: nni.report_intermediate_result(test_res["f1_score_macro"][-1]) else: print(self._parameters["it_num"], self._parameters["iterations"], epoch + 1, self._parameters["epochs"], self._parameters["lr"], self._parameters["dropout"], self._parameters["hid_size"], self._parameters["weight_decay"], self._parameters["temporal_pen"], train_results['loss'].item(), train_results['tempo_loss'].item(), train_results['loss'].item() + train_results['tempo_loss'].item(), train_results['f1_score_macro'][-1], train_results['f1_score_micro'][-1], test_res["loss"], test_res["tempo_loss"], test_res["loss"] + test_res["tempo_loss"], test_res["f1_score_macro"][-1], test_res["f1_score_micro"][-1]) self._logger.debug('Epoch: {:04d} '.format(epoch + 1) + 'lr: {:04f} '.format(self._parameters['lr']) + 'dropout: {:04f} '.format(self._parameters['dropout']) + 'hid_size: {:04f} '.format(self._parameters['hid_size']) + 'weight_decay: {:04f} '.format(self._parameters['weight_decay']) + 'temporal_pen: {:04f} '.format(self._parameters['temporal_pen']) + 'reg_loss_train: {:.4f} '.format(train_results['loss']) + 'temp_loss: {:.4f} '.format(train_results['tempo_loss'])) result = self.test('test', print_to_file=True) if self._is_nni: nni.report_final_result(result["f1_score_macro"]) return train_results_l, test_results, result, self._parameters def train(self, epoch): z_vals, outputs = [], [] labeled_indices = self._parameters['training_inds'] labels = self._parameters['training_labels'] tempo_loss = 0. loss_train = 0. self.model.train() self.opt.zero_grad() for idx, adj in enumerate(self._parameters["adj_matrices"]): input_features = torch.from_numpy(self._parameters["feature_matrices"][idx]).to(dtype=torch.float, device=self._device) z, output = self.model(input_features, adj) output = output[labeled_indices[idx], :] # Njs are the weights of the loss using the adj mx. # they should be either used or not. Nj_s = [sum([labels[u][t][j] for u in range(len(self._parameters["adj_matrices"])) for t in range(len(labels[u]))]) for j in range(self._parameters['number_of_classes'])] loss_train += self._mse_loss(output, labels[idx], Nj_s) # loss_train += self._mse_loss(output, labels[idx].float()) #without weights using the build-in mse z_vals.append(z) # After 1 GCN layer outputs.append(output) # Final predictions # counts the number of cross_year_persons z_appearances = 0. for t in range(len(z_vals) - 1): t_inds = self._parameters['training_inds'][t] t_plus_one_inds = self._parameters['training_inds'][t + 1] z_inds = [i for i in t_inds if i in t_plus_one_inds] z_appearances += len(z_inds) z_val_t = z_vals[t][z_inds, :] z_val_t_plus_1 = z_vals[t+1][z_inds, :] loss = self._temporal_loss(z_val_t_plus_1, z_val_t) tempo_loss += self._parameters["temporal_pen"] * loss tempo_loss /= z_appearances loss_train /= sum([len(labeled_indices[u]) for u in range(len(outputs))]) total_loss = loss_train + tempo_loss total_loss.backward() self.opt.step() f1_score_macro, f1_score_micro = [],[] if epoch == self._parameters['epochs']-1: for i in range(len(labels)): f1_mac, f1_mic, list_real, list_pred = self.accuracy_f1_score(outputs[i], labels[i]) f1_score_macro.append(f1_mac) f1_score_micro.append(f1_mic) result = {"loss": loss_train, "f1_score_macro": f1_score_macro, "f1_score_micro": f1_score_micro, "tempo_loss": tempo_loss} return result def test(self,epoch, print_to_file=False): z_vals, outputs = [], [] labeled_indices = self._parameters['test_inds'] labels = self._parameters['test_labels'] tempo_loss = 0. loss_test = 0. test_z_appearances = 0. self.model.eval() for idx, adj in enumerate(self._parameters["adj_matrices"]): test_mat = torch.from_numpy(self._parameters["feature_matrices"][idx]).to(self._device) z, output = self.model(*[test_mat, adj]) output = output[labeled_indices[idx], :] loss_test += self._mse_loss(output, labels[idx].float()) z_vals.append(z) outputs.append(output) if print_to_file: grid_outputs_folder = str(self._parameters['name']) self._logger.debug("\nprint to files") for i in range(len(self._parameters["adj_matrices"])): np_output = outputs[i].cpu().data.numpy() products_path = os.path.join(os.getcwd(),'dataset',self._parameters["dataset_name"], "gcn_outputs",grid_outputs_folder) if not os.path.exists(products_path): os.makedirs(products_path) with open(os.path.join("dataset",self._parameters["dataset_name"],"gcn_outputs",grid_outputs_folder, "gcn_" + str(i) + ".pkl"), "wb") as f: pickle.dump(np_output, f, protocol=pickle.HIGHEST_PROTOCOL) for t in range(len(z_vals) - 1): t_inds = self._parameters['test_inds'][t] t_plus_one_inds = self._parameters['test_inds'][t + 1] z_inds = [i for i in t_inds if i in t_plus_one_inds] test_z_appearances += len(z_inds) z_val_t = z_vals[t][z_inds, :] z_val_t_plus_1 = z_vals[t + 1][z_inds, :] tempo_loss += self._parameters["temporal_pen"] * self._temporal_loss(z_val_t_plus_1, z_val_t) tempo_loss /= test_z_appearances loss_test /= sum([len(labeled_indices[u]) for u in range(len(outputs))]) f1_score_macro, f1_score_micro = [], [] real,pred = [],[] if epoch == self._parameters['epochs'] - 1 or epoch == 'test': for i in range(len(labels)): #running over the years f1_mac, f1_mic, list_real, list_pred = self.accuracy_f1_score(outputs[i], labels[i]) f1_score_macro.append(f1_mac) f1_score_micro.append(f1_mic) real.extend(list_real) pred.extend(list_pred) self.confusion_matrix(real, pred) # of all years normalized to 1 for the last epoch test result = {"loss": loss_test.data.item(), "f1_score_macro": f1_score_macro, "f1_score_micro": f1_score_micro, "tempo_loss": tempo_loss.data.item()} return result def accuracy_f1_score(self,output, labels): pred, real = [],[] for person in range(labels.size(0)): #range of all persons for label in range(labels.size(1)): if labels[person,label]==0: continue else: argmax = output[person].max(0)[1] real.append(label) pred.append(argmax.cpu().item()) f1_macro = f1_score(real, pred, average='macro') f1_micro = f1_score(real, pred, average='micro') return f1_macro, f1_micro, real,pred def confusion_matrix(self, list_real, list_pred): matrix = np.zeros((self._parameters["number_of_classes"], self._parameters["number_of_classes"])) # classes X classes for i in range(len(list_pred)): matrix[list_real[i], list_pred[i]] += 1 row_sums = matrix.sum(axis=1, dtype='float') new_matrix = np.zeros((self._parameters["number_of_classes"], self._parameters["number_of_classes"])) # classes X classes for i, (row, row_sum) in enumerate(zip(matrix, row_sums)): if row_sum == 0: new_matrix[i, :] = 0 else: new_matrix[i, :] = row / row_sum new_matrix = np.around(new_matrix, 3) b = np.asarray(new_matrix) self._parameters['diag_sum'] = np.trace(b) self._parameters['diag_elements'] = np.diagonal(b) print('Diagonal (sum): ', np.trace(b)) print('Diagonal (elements): ', np.diagonal(b)) fig = plt.figure() ax = fig.add_subplot(111) cax = ax.matshow(new_matrix, interpolation='nearest') fig.colorbar(cax) ax.set_yticks(plt.np.arange(self._parameters["number_of_classes"])) ax.set_yticklabels(i for i in range(self._parameters["number_of_classes"])) ax.set_xticks(plt.np.arange(self._parameters["number_of_classes"])) ax.set_xticklabels(i for i in range(self._parameters["number_of_classes"])) ax.tick_params(axis='y', labelsize=7) ax.tick_params(axis='x', labelsize=7, labelbottom=True, labeltop=False) plt.title('Confusion matrix') ax.axis('image') plt.xlabel("Predicted label") plt.ylabel("Real label") mypath = "./dataset/"+self._parameters["dataset_name"]+"/figures" if not os.path.exists(mypath): os.makedirs(mypath) plt.savefig("./dataset/"+self._parameters["dataset_name"]+"/figures/cofution_matrix"+str(self._parameters['name'])+time.strftime("%Y%m%d_%H%M%S")+".png") plt.clf() plt.close() return
def train_gcn(training_features, training_adjs, training_labels, eval_features, eval_adjs, eval_labels, params, class_weights, activations, unary, coeffs, graph_params): device = torch.device( 'cuda:1') if torch.cuda.is_available() else torch.device('cpu') gcn = GCN(n_features=training_features[0].shape[1], hidden_layers=params["hidden_layers"], dropout=params["dropout"], activations=activations, p=graph_params["probability"], normalization=params["edge_normalization"]) gcn.to(device) opt = params["optimizer"](gcn.parameters(), lr=params["lr"], weight_decay=params["regularization"]) n_training_graphs = len(training_labels) graph_size = graph_params["vertices"] n_eval_graphs = len(eval_labels) counter = 0 # For early stopping min_loss = None for epoch in range(params["epochs"]): # -------------------------- TRAINING -------------------------- training_graphs_order = np.arange(n_training_graphs) np.random.shuffle(training_graphs_order) for i, idx in enumerate(training_graphs_order): training_mat = torch.tensor(training_features[idx], device=device) training_adj, training_lbs = map( lambda x: torch.tensor( data=x[idx], dtype=torch.double, device=device), [training_adjs, training_labels]) gcn.train() opt.zero_grad() output_train = gcn(training_mat, training_adj) output_matrix_flat = ( torch.mm(output_train, output_train.transpose(0, 1)) + 1 / 2).flatten() training_criterion = gcn_build_weighted_loss( unary, class_weights, training_lbs) loss_train = coeffs[0] * training_criterion(output_train.view(output_train.shape[0]), training_lbs) + \ coeffs[1] * gcn_pairwise_loss(output_matrix_flat, training_adj.flatten()) + \ coeffs[2] * gcn_binomial_reg(output_train, graph_params) loss_train.backward() opt.step() # -------------------------- EVALUATION -------------------------- graphs_order = np.arange(n_eval_graphs) np.random.shuffle(graphs_order) outputs = torch.zeros(graph_size * n_eval_graphs, dtype=torch.double) output_xs = torch.zeros(graph_size**2 * n_eval_graphs, dtype=torch.double) adj_flattened = torch.tensor( np.hstack([eval_adjs[idx].flatten() for idx in graphs_order])) for i, idx in enumerate(graphs_order): eval_mat = torch.tensor(eval_features[idx], device=device) eval_adj, eval_lbs = map( lambda x: torch.tensor( data=x[idx], dtype=torch.double, device=device), [eval_adjs, eval_labels]) gcn.eval() output_eval = gcn(eval_mat, eval_adj) output_matrix_flat = ( torch.mm(output_eval, output_eval.transpose(0, 1)) + 1 / 2).flatten() output_xs[i * graph_size**2:(i + 1) * graph_size**2] = output_matrix_flat.cpu() outputs[i * graph_size:(i + 1) * graph_size] = output_eval.view( output_eval.shape[0]).cpu() all_eval_labels = torch.tensor(np.hstack( [eval_labels[idx] for idx in graphs_order]), dtype=torch.double) eval_criterion = gcn_build_weighted_loss(unary, class_weights, all_eval_labels) loss_eval = ( coeffs[0] * eval_criterion(outputs, all_eval_labels) + coeffs[1] * gcn_pairwise_loss(output_xs, adj_flattened) + coeffs[2] * gcn_binomial_reg(outputs, graph_params)).item() if min_loss is None: current_min_loss = loss_eval else: current_min_loss = min(min_loss, loss_eval) if epoch >= 10 and params[ "early_stop"]: # Check for early stopping during training. if min_loss is None: min_loss = current_min_loss torch.save(gcn.state_dict(), "tmp_time.pt") # Save the best state. elif loss_eval < min_loss: min_loss = current_min_loss torch.save(gcn.state_dict(), "tmp_time.pt") # Save the best state. counter = 0 else: counter += 1 if counter >= 40: # Patience for learning break # After stopping early, our model is the one with the best eval loss. gcn.load_state_dict(torch.load("tmp_time.pt")) os.remove("tmp_time.pt") return gcn
def main(args): # 0. initial setting # set environmet cudnn.benchmark = True if not os.path.isdir(os.path.join(args.path, './ckpt')): os.mkdir(os.path.join(args.path, './ckpt')) if not os.path.isdir(os.path.join(args.path, './results')): os.mkdir(os.path.join(args.path, './results')) if not os.path.isdir(os.path.join(args.path, './ckpt', args.name)): os.mkdir(os.path.join(args.path, './ckpt', args.name)) if not os.path.isdir(os.path.join(args.path, './results', args.name)): os.mkdir(os.path.join(args.path, './results', args.name)) if not os.path.isdir(os.path.join(args.path, './results', args.name, "log")): os.mkdir(os.path.join(args.path, './results', args.name, "log")) # set logger logger = logging.getLogger() logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(message)s') handler = logging.FileHandler( os.path.join( args.path, "results/{}/log/{}.log".format( args.name, time.strftime('%c', time.localtime(time.time()))))) handler.setFormatter(formatter) logger.addHandler(handler) logger.addHandler(logging.StreamHandler()) args.logger = logger # set cuda if torch.cuda.is_available(): args.logger.info("running on cuda") args.device = torch.device("cuda") args.use_cuda = True else: args.logger.info("running on cpu") args.device = torch.device("cpu") args.use_cuda = False args.logger.info("[{}] starts".format(args.name)) # 1. load data adj, features, labels, idx_train, idx_val, idx_test = load_data() # 2. setup CORA_NODES = 2708 CORA_FEATURES = 1433 CORA_CLASSES = 7 CITESEER_NODES = 3327 CITESEER_FEATURES = 3703 CITESEER_CLASSES = 6 (num_nodes, feature_dim, classes) = (CORA_NODES, CORA_FEATURES, CORA_CLASSES) if args.dataset == 'cora' else ( CITESEER_NODES, CITESEER_FEATURES, CITESEER_CLASSES) args.logger.info("setting up...") model = GCN(args, feature_dim, args.hidden, classes, args.dropout) if args.model == 'gcn' else SpGAT( args, feature_dim, args.hidden, classes, args.dropout, args.alpha, args.n_heads) model.to(args.device) loss_fn = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.load: loaded_data = load(args, args.ckpt) model.load_state_dict(loaded_data['model']) optimizer.load_state_dict(loaded_data['optimizer']) # 3. train / test if not args.test: # train args.logger.info("starting training") train_loss_meter = AverageMeter(args, name="Loss", save_all=True, x_label="epoch") val_acc_meter = AverageMeter(args, name="Val Acc", save_all=True, x_label="epoch") earlystop_listener = val_acc_meter.attach_combo_listener( (lambda prev, new: prev.max >= new.max), threshold=args.patience) steps = 1 for epoch in range(1, 1 + args.epochs): spent_time = time.time() model.train() train_loss_tmp_meter = AverageMeter(args) if args.start_from_step is not None: if steps < args.start_from_step: steps += 1 continue optimizer.zero_grad() batch = len(idx_train) output = model(features.to(args.device), adj.to(args.device)) loss = loss_fn(output[idx_train], labels[idx_train].to(args.device)) loss.backward() optimizer.step() train_loss_tmp_meter.update(loss, weight=batch) steps += 1 train_loss_meter.update(train_loss_tmp_meter.avg) spent_time = time.time() - spent_time args.logger.info( "[{}] train loss: {:.3f} took {:.1f} seconds".format( epoch, train_loss_tmp_meter.avg, spent_time)) model.eval() spent_time = time.time() if not args.fastmode: with torch.no_grad(): output = model(features.to(args.device), adj.to(args.device)) acc = accuracy(output[idx_val], labels[idx_val]) * 100.0 val_acc_meter.update(acc) earlystop = earlystop_listener.listen() spent_time = time.time() - spent_time args.logger.info( "[{}] val acc: {:2.1f} % took {:.1f} seconds".format( epoch, acc, spent_time)) if steps % args.save_period == 0: save(args, "epoch{}".format(epoch), {'model': model.state_dict()}) train_loss_meter.plot(scatter=False) val_acc_meter.plot(scatter=False) val_acc_meter.save() if earlystop: break else: # test args.logger.info("starting test") model.eval() with torch.no_grad(): model(features.to(args.device), adj.to(args.device)) acc = accuracy(output[idx_test], labels[idx_test]) * 100 logger.d("test acc: {:2.1f} % took {:.1f} seconds".format( acc, spent_time))