def pipeline(subgraph_max_nodes): dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name) input_dim = dataset.num_node_features output_dim = dataset.num_classes data = dataset[0] node_indices = torch.where(data.test_mask * data.y != 0)[0] gnnNets = GnnNets_NC(input_dim, output_dim, model_args) checkpoint = torch.load(mcts_args.explain_model_path) gnnNets.update_state_dict(checkpoint['net']) gnnNets.to_device() gnnNets.eval() save_dir = os.path.join('./results', f"{mcts_args.dataset_name}" f"_{model_args.model_name}" f"_{reward_args.reward_method}") if not os.path.isdir(save_dir): os.mkdir(save_dir) plotutils = PlotUtils(dataset_name=data_args.dataset_name) fidelity_score_list = [] sparsity_score_list = [] for node_idx in tqdm(node_indices): # find the paths and build the graph result_path = os.path.join(save_dir, f"node_{node_idx}_score.pt") # get data and prediction logits, prob, _ = gnnNets(data.clone()) _, prediction = torch.max(prob, -1) prediction = prediction[node_idx].item() # build the graph for visualization graph = to_networkx(data, to_undirected=True) node_labels = {k: int(v) for k, v in enumerate(data.y)} nx.set_node_attributes(graph, node_labels, 'label') # searching using gnn score mcts_state_map = MCTS(node_idx=node_idx, ori_graph=graph, X=data.x, edge_index=data.edge_index, num_hops=len(model_args.latent_dim), n_rollout=mcts_args.rollout, min_atoms=mcts_args.min_atoms, c_puct=mcts_args.c_puct, expand_atoms=mcts_args.expand_atoms) value_func = GnnNets_NC2value_func(gnnNets, node_idx=mcts_state_map.node_idx, target_class=prediction) score_func = reward_func(reward_args, value_func) mcts_state_map.set_score_func(score_func) # get searching result if os.path.isfile(result_path): gnn_results = torch.load(result_path) else: gnn_results = mcts_state_map.mcts(verbose=True) torch.save(gnn_results, result_path) tree_node_x = find_closest_node_result(gnn_results, subgraph_max_nodes) # calculate the metrics original_node_list = [i for i in tree_node_x.ori_graph.nodes] masked_node_list = [i for i in tree_node_x.ori_graph.nodes if i not in tree_node_x.coalition or i == mcts_state_map.node_idx] original_score = gnn_score(original_node_list, tree_node_x.data, value_func=value_func, subgraph_building_method='zero_filling') masked_score = gnn_score(masked_node_list, tree_node_x.data, value_func=value_func, subgraph_building_method='zero_filling') sparsity_score = 1 - len(tree_node_x.coalition)/tree_node_x.ori_graph.number_of_nodes() fidelity_score_list.append(original_score - masked_score) sparsity_score_list.append(sparsity_score) # visualization subgraph_node_labels = nx.get_node_attributes(tree_node_x.ori_graph, name='label') subgraph_node_labels = torch.tensor([v for k, v in subgraph_node_labels.items()]) plotutils.plot(tree_node_x.ori_graph, tree_node_x.coalition, y=subgraph_node_labels, node_idx=mcts_state_map.node_idx, figname=os.path.join(save_dir, f"node_{node_idx}.png")) fidelity_scores = torch.tensor(fidelity_score_list) sparsity_scores = torch.tensor(sparsity_score_list) return fidelity_scores, sparsity_scores
from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator from pandas import DataFrame as df from joblib import Parallel, delayed from util import paths_to_tensor import numpy as np import multiprocessing if __name__ == '__main__': """ Realiza a extração de features das imagens utilizando a ResNet50 pretreinada (paralela) - data aug """ model = ResNet50(include_top=False, weights='imagenet') print("Carregando os dados...") train_files, train_targets = get_dataset("train", 1, True) valid_files, valid_targets = get_dataset("val", 1, True) data_augmentation = True files_names = [["train_aug.csv", "train_target_aug.csv"], ["val_aug.csv", "val_target_aug.csv"]] block_size = 500 n_examples = [len(train_files), len(valid_files)] for i_file in [0]: files = [train_files, valid_files][i_file] target = [train_targets, valid_targets][i_file] print("Incializando o pre data Augmentation...") datagen_train = ImageDataGenerator(
def pipeline(max_nodes): dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name) plotutils = PlotUtils(dataset_name=data_args.dataset_name) input_dim = dataset.num_node_features output_dim = dataset.num_classes if data_args.dataset_name == 'mutag': data_indices = list(range(len(dataset))) else: loader = get_dataloader(dataset, batch_size=train_args.batch_size, random_split_flag=data_args.random_split, data_split_ratio=data_args.data_split_ratio, seed=data_args.seed) data_indices = loader['test'].dataset.indices gnnNets = GnnNets(input_dim, output_dim, model_args) checkpoint = torch.load(mcts_args.explain_model_path) gnnNets.update_state_dict(checkpoint['net']) gnnNets.to_device() gnnNets.eval() save_dir = os.path.join( './results', f"{mcts_args.dataset_name}_" f"{model_args.model_name}_" f"{reward_args.reward_method}") if not os.path.isdir(save_dir): os.mkdir(save_dir) fidelity_score_list = [] sparsity_score_list = [] for i in tqdm(data_indices): # get data and prediction data = dataset[i] _, probs, _ = gnnNets(Batch.from_data_list([data.clone()])) prediction = probs.squeeze().argmax(-1).item() original_score = probs.squeeze()[prediction] # get the reward func value_func = GnnNets_GC2value_func(gnnNets, target_class=prediction) payoff_func = reward_func(reward_args, value_func) # find the paths and build the graph result_path = os.path.join(save_dir, f"example_{i}.pt") # mcts for l_shapely mcts_state_map = MCTS(data.x, data.edge_index, score_func=payoff_func, n_rollout=mcts_args.rollout, min_atoms=mcts_args.min_atoms, c_puct=mcts_args.c_puct, expand_atoms=mcts_args.expand_atoms) if os.path.isfile(result_path): results = torch.load(result_path) else: results = mcts_state_map.mcts(verbose=True) torch.save(results, result_path) # l sharply score graph_node_x = find_closest_node_result(results, max_nodes=max_nodes) masked_node_list = [ node for node in list(range(graph_node_x.data.x.shape[0])) if node not in graph_node_x.coalition ] fidelity_score = original_score - gnn_score( masked_node_list, data, value_func, subgraph_building_method='zero_filling') sparsity_score = 1 - len( graph_node_x.coalition) / graph_node_x.ori_graph.number_of_nodes() fidelity_score_list.append(fidelity_score) sparsity_score_list.append(sparsity_score) # visualization if hasattr(dataset, 'supplement'): words = dataset.supplement['sentence_tokens'][str(i)] plotutils.plot(graph_node_x.ori_graph, graph_node_x.coalition, words=words, figname=os.path.join(save_dir, f"example_{i}.png")) else: plotutils.plot(graph_node_x.ori_graph, graph_node_x.coalition, x=graph_node_x.data.x, figname=os.path.join(save_dir, f"example_{i}.png")) fidelity_scores = torch.tensor(fidelity_score_list) sparsity_scores = torch.tensor(sparsity_score_list) return fidelity_scores, sparsity_scores
state = torch.load("%s/%d.pth" % (opt.checkpoint_dir, opt.epoch)) model.load_state_dict(state.get('weight', False)) opt = state.get('opt') opt.epoch = temp_opt.epoch if opt.model_name == 'unet_nested': criterion = BCEDiceLoss() elif opt.n_class > 1: criterion = nn.CrossEntropyLoss() else: criterion = nn.BCEWithLogitsLoss() optimizer = optim.AdamW(model.parameters()) dataloader = DataLoader( load_dataset.get_dataset(opt), batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_cpu, ) Tensor = torch.cuda.FloatTensor if cuda else torch.Tensor log_list = [] for epoch in range(opt.epoch, opt.n_epochs): epoch_loss = 0 num_batches = len(dataloader) for i, imgs in enumerate(dataloader): # Configure model input data = Variable(imgs["input"].type(Tensor)) true_mask = Variable(imgs["gt"].type(Tensor))
def pipeline_NC(top_k): dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name) input_dim = dataset.num_node_features output_dim = dataset.num_classes data = dataset[0] node_indices = torch.where(data.test_mask * data.y != 0)[0].tolist() gnnNets = GnnNets_NC(input_dim, output_dim, model_args) checkpoint = torch.load(model_args.model_path) gnnNets.update_state_dict(checkpoint['net']) gnnNets.to_device() gnnNets.eval() save_dir = os.path.join( './results', f"{data_args.dataset_name}_" f"{model_args.model_name}_" f"pgexplainer") if not os.path.isdir(save_dir): os.mkdir(save_dir) pgexplainer = PGExplainer(gnnNets) if torch.cuda.is_available(): torch.cuda.synchronize() tic = time.perf_counter() pgexplainer.get_explanation_network(dataset, is_graph_classification=False) if torch.cuda.is_available(): torch.cuda.synchronize() toc = time.perf_counter() training_duration = toc - tic print(f"training time is {training_duration}s ") duration = 0.0 data = dataset[0] fidelity_score_list = [] sparsity_score_list = [] plotutils = PlotUtils(dataset_name=data_args.dataset_name) for ori_node_idx in tqdm(node_indices): tic = time.perf_counter() if glob.glob(os.path.join(save_dir, f"node_{ori_node_idx}.pt")): file = glob.glob(os.path.join(save_dir, f"node_{ori_node_idx}.pt"))[0] edge_mask, x, edge_index, y, subset = torch.load(file) edge_mask = torch.from_numpy(edge_mask) node_idx = int(torch.where(subset == ori_node_idx)[0]) pred_label = pgexplainer.get_node_prediction( node_idx, x, edge_index) else: x, edge_index, y, subset, kwargs = \ pgexplainer.get_subgraph(node_idx=ori_node_idx, x=data.x, edge_index=data.edge_index, y=data.y) node_idx = int(torch.where(subset == ori_node_idx)[0]) edge_mask = pgexplainer.explain_edge_mask(x, edge_index) pred_label = pgexplainer.get_node_prediction( node_idx, x, edge_index) save_path = os.path.join(save_dir, f"node_{ori_node_idx}.pt") edge_mask = edge_mask.cpu() cache_list = [edge_mask.numpy(), x, edge_index, y, subset] torch.save(cache_list, save_path) duration += time.perf_counter() - tic sub_data = Data(x=x, edge_index=edge_index, y=y) graph = to_networkx(sub_data) fidelity_score = top_k_fidelity(sub_data, edge_mask, top_k, gnnNets, pred_label) sparsity_score = top_k_sparsity(sub_data, edge_mask, top_k) fidelity_score_list.append(fidelity_score) sparsity_score_list.append(sparsity_score) # visualization plotutils.plot_soft_edge_mask(graph, edge_mask, top_k, y=sub_data.y, node_idx=node_idx, un_directed=True, figname=os.path.join( save_dir, f"example_{ori_node_idx}.png")) fidelity_scores = torch.tensor(fidelity_score_list) sparsity_scores = torch.tensor(sparsity_score_list) return fidelity_scores, sparsity_scores
def pipeline_GC(top_k): dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name) if data_args.dataset_name == 'mutag': data_indices = list(range(len(dataset))) pgexplainer_trainset = dataset else: loader = get_dataloader(dataset, batch_size=train_args.batch_size, random_split_flag=data_args.random_split, data_split_ratio=data_args.data_split_ratio, seed=data_args.seed) data_indices = loader['test'].dataset.indices pgexplainer_trainset = loader['train'].dataset input_dim = dataset.num_node_features output_dim = dataset.num_classes gnnNets = GnnNets(input_dim, output_dim, model_args) checkpoint = torch.load(model_args.model_path) gnnNets.update_state_dict(checkpoint['net']) gnnNets.to_device() gnnNets.eval() save_dir = os.path.join( './results', f"{data_args.dataset_name}_" f"{model_args.model_name}_" f"pgexplainer") if not os.path.isdir(save_dir): os.mkdir(save_dir) pgexplainer = PGExplainer(gnnNets) if torch.cuda.is_available(): torch.cuda.synchronize() tic = time.perf_counter() pgexplainer.get_explanation_network(pgexplainer_trainset) if torch.cuda.is_available(): torch.cuda.synchronize() toc = time.perf_counter() training_duration = toc - tic print(f"training time is {training_duration: .4}s ") explain_duration = 0.0 plotutils = PlotUtils(dataset_name=data_args.dataset_name) fidelity_score_list = [] sparsity_score_list = [] for data_idx in tqdm(data_indices): data = dataset[data_idx] if torch.cuda.is_available(): torch.cuda.synchronize() tic = time.perf_counter() prob = pgexplainer.eval_probs(data.x, data.edge_index) pred_label = prob.argmax(-1).item() if glob.glob(os.path.join(save_dir, f"example_{data_idx}.pt")): file = glob.glob(os.path.join(save_dir, f"example_{data_idx}.pt"))[0] edge_mask = torch.from_numpy(torch.load(file)) else: edge_mask = pgexplainer.explain_edge_mask(data.x, data.edge_index) save_path = os.path.join(save_dir, f"example_{data_idx}.pt") edge_mask = edge_mask.cpu() torch.save(edge_mask.detach().numpy(), save_path) if torch.cuda.is_available(): torch.cuda.synchronize() toc = time.perf_counter() explain_duration += (toc - tic) graph = to_networkx(data) fidelity_score = top_k_fidelity(data, edge_mask, top_k, gnnNets, pred_label) sparsity_score = top_k_sparsity(data, edge_mask, top_k) fidelity_score_list.append(fidelity_score) sparsity_score_list.append(sparsity_score) # visualization if hasattr(dataset, 'supplement'): words = dataset.supplement['sentence_tokens'][str(data_idx)] plotutils.plot_soft_edge_mask(graph, edge_mask, top_k, x=data.x, words=words, un_directed=True, figname=os.path.join( save_dir, f"example_{data_idx}.png")) else: plotutils.plot_soft_edge_mask(graph, edge_mask, top_k, x=data.x, un_directed=True, figname=os.path.join( save_dir, f"example_{data_idx}.png")) fidelity_scores = torch.tensor(fidelity_score_list) sparsity_scores = torch.tensor(sparsity_score_list) return fidelity_scores, sparsity_scores
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization from keras.layers import Dropout, Flatten, Dense from keras.models import Sequential from keras.callbacks import ModelCheckpoint from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator from PIL import ImageFile from joblib import Parallel, delayed from util import paths_to_tensor import numpy as np import multiprocessing if __name__ == "__main__": print("Carregando os dados...") valid_files, valid_targets = get_dataset("val") # pre-process the data for Keras print("Convertendo para tensor...") valid_tensors = paths_to_tensor(valid_files).astype('float32') / 255.0 model = Sequential() model.add( ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))) model.add(Flatten(name='flatten')) model.add(Dense(83, activation='softmax')) model.compile(optimizer='rmsprop', loss='binary_crossentropy',
checkpoints = [ f for f in glob.glob(os.path.join(opt.checkpoint_dir, '*.pth')) ] for checkpoint in checkpoints: print("Starting Checkpoint", checkpoint) state = torch.load(checkpoint) saved_opt = state.get('opt') model = load_model.load_model(saved_opt) model.load_state_dict(state.get('weight', False)) if cuda: model.cuda() saved_opt.test = True dataloader = DataLoader( load_dataset.get_dataset(saved_opt), batch_size=saved_opt.batch_size, shuffle=False, num_workers=saved_opt.n_cpu, ) for i, imgs in enumerate(dataloader): row = [] # Configure model input data = imgs["input"].type(Tensor) true_mask = imgs["gt"].type(Tensor) model.eval() with torch.no_grad(): predicted_mask = model(data) if saved_opt.deep_supervision:
def train_GC(): # attention the multi-task here print('start loading data====================') dataset = get_dataset(data_args) input_dim = dataset.num_node_features output_dim = int(dataset.num_classes) dataloader = get_dataloader(dataset, data_args, train_args) print('start training model==================') gnnNets = GnnNets(input_dim, output_dim, model_args) gnnNets.to_device() criterion = nn.CrossEntropyLoss() optimizer = Adam(gnnNets.parameters(), lr=train_args.learning_rate, weight_decay=train_args.weight_decay) avg_nodes = 0.0 avg_edge_index = 0.0 for i in range(len(dataset)): avg_nodes += dataset[i].x.shape[0] avg_edge_index += dataset[i].edge_index.shape[1] avg_nodes /= len(dataset) avg_edge_index /= len(dataset) print( f"graphs {len(dataset)}, avg_nodes{avg_nodes :.4f}, avg_edge_index_{avg_edge_index/2 :.4f}" ) best_acc = 0.0 data_size = len(dataset) print(f'The total num of dataset is {data_size}') # save path for model if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') if not os.path.isdir(os.path.join('checkpoint', data_args.dataset_name)): os.mkdir(os.path.join('checkpoint', f"{data_args.dataset_name}")) ckpt_dir = f"./checkpoint/{data_args.dataset_name}/" early_stop_count = 0 for epoch in range(train_args.max_epochs): acc = [] loss_list = [] gnnNets.train() for batch in dataloader['train']: logits, probs, _ = gnnNets(batch) loss = criterion(logits, batch.y) # optimization optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_value_(gnnNets.parameters(), clip_value=2.0) optimizer.step() ## record _, prediction = torch.max(logits, -1) loss_list.append(loss.item()) acc.append(prediction.eq(batch.y).cpu().numpy()) # report train msg print(f"Train Epoch:{epoch} |Loss: {np.average(loss_list):.3f} | " f"Acc: {np.concatenate(acc, axis=0).mean():.3f}") # report eval msg eval_state = evaluate_GC(dataloader['eval'], gnnNets, criterion) print( f"Eval Epoch: {epoch} | Loss: {eval_state['loss']:.3f} | Acc: {eval_state['acc']:.3f}" ) # only save the best model is_best = (eval_state['acc'] > best_acc) if eval_state['acc'] > best_acc: early_stop_count = 0 else: early_stop_count += 1 if early_stop_count > train_args.early_stopping: break if is_best: best_acc = eval_state['acc'] early_stop_count = 0 if is_best or epoch % train_args.save_epoch == 0: save_best(ckpt_dir, epoch, gnnNets, model_args.model_name, eval_state['acc'], is_best) print(f"The best validation accuracy is {best_acc}.") # report test msg checkpoint = torch.load( os.path.join(ckpt_dir, f'{model_args.model_name}_best.pth')) gnnNets.update_state_dict(checkpoint['net']) test_state, _, _ = test_GC(dataloader['test'], gnnNets, criterion) print( f"Test: | Loss: {test_state['loss']:.3f} | Acc: {test_state['acc']:.3f}" )
def train_NC(): print('start loading data====================') import pdb pdb.set_trace() dataset = get_dataset(data_args) input_dim = dataset.num_node_features output_dim = int(dataset.num_classes) # save path for model if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') if not os.path.isdir( os.path.join('checkpoint', f"{data_args.dataset_name}")): os.mkdir(os.path.join('checkpoint', f"{data_args.dataset_name}")) ckpt_dir = f"./checkpoint/{data_args.dataset_name}/" data = dataset[0] gnnNets_NC = GnnNets_NC(input_dim, output_dim, model_args) gnnNets_NC.to_device() criterion = nn.CrossEntropyLoss() optimizer = Adam(gnnNets_NC.parameters(), lr=train_args.learning_rate, weight_decay=train_args.weight_decay) best_val_loss = float('inf') best_acc = 0 val_loss_history = [] early_stop_count = 0 for epoch in range(1, train_args.max_epochs + 1): gnnNets_NC.train() logits, prob, _ = gnnNets_NC(data) loss = criterion(logits[data.train_mask], data.y[data.train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() eval_info = evaluate_NC(data, gnnNets_NC, criterion) eval_info['epoch'] = epoch if eval_info['val_loss'] < best_val_loss: best_val_loss = eval_info['val_loss'] val_acc = eval_info['val_acc'] val_loss_history.append(eval_info['val_loss']) # only save the best model is_best = (eval_info['val_acc'] > best_acc) if eval_info['val_acc'] > best_acc: early_stop_count = 0 else: early_stop_count += 1 if early_stop_count > train_args.early_stopping: break if is_best: best_acc = eval_info['val_acc'] if is_best or epoch % train_args.save_epoch == 0: save_best(ckpt_dir, epoch, gnnNets_NC, model_args.model_name, eval_info['val_acc'], is_best) print(f'Epoch {epoch}, Train Loss: {eval_info["train_loss"]:.4f}, ' f'Train Accuracy: {eval_info["train_acc"]:.3f}, ' f'Val Loss: {eval_info["val_loss"]:.3f}, ' f'Val Accuracy: {eval_info["val_acc"]:.3f}') # report test msg checkpoint = torch.load( os.path.join(ckpt_dir, f'{model_args.model_name}_best.pth')) gnnNets_NC.update_state_dict(checkpoint['net']) eval_info = evaluate_NC(data, gnnNets_NC, criterion) print( f'Test Loss: {eval_info["test_loss"]:.4f}, Test Accuracy: {eval_info["test_acc"]:.3f}' )
from pandas import DataFrame as df from joblib import Parallel, delayed from util import paths_to_tensor import numpy as np import multiprocessing if __name__ == '__main__': """ Realiza a extração de features das imagens utilizando a ResNet50 pretreinada (paralela) """ model = ResNet50(include_top=False, weights='imagenet') print("Carregando os dados...") train_files, train_targets = get_dataset("train") valid_files, valid_targets = get_dataset("val") files_names = ["dataset_train3.csv", "dataset_val3.csv"] block_size = 500 # 0 - train, 1 - validação for i_file in [0, 1]: files = [train_files, valid_files][i_file] count = int(np.ceil(len(files) / block_size)) for i in range(count): print("{0} lotte: {1}/{2}".format(files_names[i_file], i, count)) begin = i * block_size