def get_roc_auc(trained_classifier_model, GNNgraph_list, dataset_features, cuda): trained_classifier_model.eval() score_list = [] target_list = [] if dataset_features["num_class"] > 2: print("Unable to calculate fidelity for multiclass datset") return 0 # Instead of sending the whole list as batch, # do it one by one in case classifier do not support batch-processing # TODO: Enable batch processing support for GNNgraph in GNNgraph_list: node_feat, n2n, subg = graph_to_tensor( [GNNgraph], dataset_features["feat_dim"], dataset_features["edge_feat_dim"], cuda) output = trained_classifier_model(node_feat, n2n, subg, [GNNgraph]) logits = F.log_softmax(output, dim=1) prob = F.softmax(logits, dim=1) score_list.append(prob.cpu().detach()) target_list.append(GNNgraph.label) score_list = torch.cat(score_list).cpu().numpy() score_list = score_list[:, 1] roc_auc = metrics.roc_auc_score( target_list, score_list, average='macro') return roc_auc
def get_accuracy(trained_classifier_model, GNNgraph_list, dataset_features, cuda): trained_classifier_model.eval() true_equal_pred_pairs = [] # Instead of sending the whole list as batch, # do it one by one in case classifier do not support batch-processing # TODO: Enable batch processing support for GNNgraph in GNNgraph_list: node_feat, n2n, subg = graph_to_tensor( [GNNgraph], dataset_features["feat_dim"], dataset_features["edge_feat_dim"], cuda) output = trained_classifier_model(node_feat, n2n, subg, [GNNgraph]) logits = F.log_softmax(output, dim=1) pred = logits.data.max(1, keepdim=True)[1] if GNNgraph.label == int(pred[0]): true_equal_pred_pairs.append(1) else: true_equal_pred_pairs.append(0) return sum(true_equal_pred_pairs)/len(true_equal_pred_pairs)
def DeepLIFT(classifier_model, config, dataset_features, GNNgraph_list, current_fold, cuda=0): ''' :param classifier_model: trained classifier model :param config: parsed configuration file of config.yml :param dataset_features: a dictionary of dataset features obtained from load_data.py :param GNNgraph_list: a list of GNNgraphs obtained from the dataset :param cuda: whether to use GPU to perform conversion to tensor ''' # Initialise settings config = config interpretability_config = config["interpretability_methods"]["DeepLIFT"] dataset_features = dataset_features # Perform deeplift on the classifier model dl = DeepLift(classifier_model) output_for_metrics_calculation = [] output_for_generating_saliency_map = {} # Obtain attribution score for use in qualitative metrics tmp_timing_list = [] for GNNgraph in GNNgraph_list: output = {'graph': GNNgraph} for _, label in dataset_features["label_dict"].items(): # Relabel all just in case, may only relabel those that need relabelling # if performance is poor original_label = GNNgraph.label GNNgraph.label = label node_feat, n2n, subg = graph_to_tensor( [GNNgraph], dataset_features["feat_dim"], dataset_features["edge_feat_dim"], cuda) start_generation = perf_counter() attribution = dl.attribute(node_feat, additional_forward_args=(n2n, subg, [GNNgraph]), target=label) tmp_timing_list.append(perf_counter() - start_generation) attribution_score = torch.sum(attribution, dim=1).tolist() attribution_score = standardize_scores(attribution_score) GNNgraph.label = original_label output[label] = attribution_score output_for_metrics_calculation.append(output) execution_time = sum(tmp_timing_list)/(len(tmp_timing_list)) # Obtain attribution score for use in generating saliency map for comparison with zero tensors if interpretability_config["compare_with_zero_tensor"] is True: if interpretability_config["sample_ids"] is not None: if ',' in str(interpretability_config["sample_ids"]): sample_graph_id_list = list(map(int, interpretability_config["sample_ids"].split(','))) else: sample_graph_id_list = [int(interpretability_config["sample_ids"])] output_for_generating_saliency_map.update({"layergradcam_%s_%s" % (str(assign_type), str(label)): [] for _, label in dataset_features["label_dict"].items()}) for index in range(len(output_for_metrics_calculation)): tmp_output = output_for_metrics_calculation[index] tmp_label = tmp_output['graph'].label if tmp_output['graph'].graph_id in sample_graph_id_list: element_name = "layergradcam_%s_%s" % (str(assign_type), str(tmp_label)) output_for_generating_saliency_map[element_name].append( (tmp_output['graph'], tmp_output[tmp_label])) elif interpretability_config["number_of_zero_tensor_samples"] > 0: # Randomly sample from existing list: graph_idxes = list(range(len(output_for_metrics_calculation))) random.shuffle(graph_idxes) output_for_generating_saliency_map.update({"deeplift_zero_tensor_class_%s" % str(label): [] for _, label in dataset_features["label_dict"].items()}) # Begin appending found samples for index in graph_idxes: tmp_label = output_for_metrics_calculation[index]['graph'].label element_name = "deeplift_zero_tensor_class_%s" % str(tmp_label) if len(output_for_generating_saliency_map[element_name]) < interpretability_config["number_of_zero_tensor_samples"]: output_for_generating_saliency_map[element_name].append( (output_for_metrics_calculation[index]['graph'], output_for_metrics_calculation[index][tmp_label])) # Obtain attribution score for use in generating saliency map for comparison with isomers if interpretability_config["compare_with_isomorphic_samples"] is True: if dataset_features["num_class"] != 2: print("DeepLIFT.py: Comparing with isomorphic samples is only possible in binary classification tasks.") else: # Get all isomorphic pairs class_0_graphs, class_1_graphs = get_isomorphic_pairs( dataset_features["name"], GNNgraph_list, config["run"]["k_fold"], current_fold, interpretability_config["number_of_isomorphic_sample_pairs"]) # Generate attribution scores for the isomorphic pairs if class_0_graphs == None: pass elif len(class_0_graphs) == 0 or len(class_1_graphs) == 0: print("DeepLIFT: No isomorphic pairs found for test dataset") else: output_for_generating_saliency_map["deeplift_isomorphic_class_0"] = [] output_for_generating_saliency_map["deeplift_isomorphic_class_1"] = [] for graph_0, graph_1 in zip(class_0_graphs, class_1_graphs): node_feat_0, n2n, subg = graph_to_tensor( [graph_0], dataset_features["feat_dim"], dataset_features["edge_feat_dim"], cuda) node_feat_1, _, _ = graph_to_tensor( [graph_1], dataset_features["feat_dim"], dataset_features["edge_feat_dim"], cuda) attribution_0 = dl.attribute(node_feat_0, additional_forward_args=(n2n, subg, [graph_0]), baselines=node_feat_1, target=graph_0.label) attribution_1 = dl.attribute(node_feat_1, additional_forward_args=(n2n, subg, [graph_1]), baselines=node_feat_0, target=graph_1.label) attribution_score_0 = torch.sum(attribution_0, dim=1).tolist() attribution_score_1 = torch.sum(attribution_1, dim=1).tolist() attribution_score_0 = standardize_scores(attribution_score_0) attribution_score_1 = standardize_scores(attribution_score_1) output_for_generating_saliency_map["deeplift_isomorphic_class_0"].append( (graph_0, attribution_score_0)) output_for_generating_saliency_map["deeplift_isomorphic_class_1"].append( (graph_1, attribution_score_1)) return output_for_metrics_calculation, output_for_generating_saliency_map, execution_time
def LayerGradCAM(classifier_model, config, dataset_features, GNNgraph_list, current_fold=None, cuda=0): ''' Attribute to input layer using soft assign :param classifier_model: trained classifier model :param config: parsed configuration file of config.yml :param dataset_features: a dictionary of dataset features obtained from load_data.py :param GNNgraph_list: a list of GNNgraphs obtained from the dataset :param current_fold: has no use in this method :param cuda: whether to use GPU to perform conversion to tensor ''' # Initialise settings config = config interpretability_config = config["interpretability_methods"][ "LayerGradCAM"] dataset_features = dataset_features assign_type = interpretability_config["assign_attribution"] # Perform grad cam on the classifier model and on a specific layer layer_idx = interpretability_config["layer"] if layer_idx == 0: gc = LayerGradCam(classifier_model, classifier_model.graph_convolution) else: gc = LayerGradCam(classifier_model, classifier_model.conv_modules[layer_idx - 1]) output_for_metrics_calculation = [] output_for_generating_saliency_map = {} # Obtain attribution score for use in qualitative metrics tmp_timing_list = [] for GNNgraph in GNNgraph_list: output = {'graph': GNNgraph} for _, label in dataset_features["label_dict"].items(): # Relabel all just in case, may only relabel those that need relabelling # if performance is poor original_label = GNNgraph.label GNNgraph.label = label node_feat, n2n, subg = graph_to_tensor( [GNNgraph], dataset_features["feat_dim"], dataset_features["edge_feat_dim"], cuda) start_generation = perf_counter() attribution = gc.attribute(node_feat, additional_forward_args=(n2n, subg, [GNNgraph]), target=label, relu_attributions=True) # Attribute to the input layer using the assign method specified reverse_assign_tensor_list = [] for i in range(1, layer_idx + 1): assign_tensor = classifier_model.cur_assign_tensor_list[i - 1] max_index = torch.argmax(assign_tensor, dim=1, keepdim=True) if assign_type == "hard": reverse_assign_tensor = torch.transpose( torch.zeros(assign_tensor.size()).scatter_(1, max_index, value=1), 0, 1) else: reverse_assign_tensor = torch.transpose( assign_tensor, 0, 1) reverse_assign_tensor_list.append(reverse_assign_tensor) attribution = torch.transpose(attribution, 0, 1) for reverse_tensor in reversed(reverse_assign_tensor_list): attribution = attribution @ reverse_tensor attribution = torch.transpose(attribution, 0, 1) tmp_timing_list.append(perf_counter() - start_generation) attribution_score = torch.sum(attribution, dim=1).tolist() attribution_score = standardize_scores(attribution_score) GNNgraph.label = original_label output[label] = attribution_score output_for_metrics_calculation.append(output) execution_time = sum(tmp_timing_list) / (len(tmp_timing_list)) # Obtain attribution score for use in generating saliency map for comparison with zero tensors if interpretability_config["sample_ids"] is not None: if ',' in str(interpretability_config["sample_ids"]): sample_graph_id_list = list( map(int, interpretability_config["sample_ids"].split(','))) else: sample_graph_id_list = [int(interpretability_config["sample_ids"])] output_for_generating_saliency_map.update({ "layergradcam_%s_%s" % (str(assign_type), str(label)): [] for _, label in dataset_features["label_dict"].items() }) for index in range(len(output_for_metrics_calculation)): tmp_output = output_for_metrics_calculation[index] tmp_label = tmp_output['graph'].label if tmp_output['graph'].graph_id in sample_graph_id_list: element_name = "layergradcam_%s_%s" % (str(assign_type), str(tmp_label)) output_for_generating_saliency_map[element_name].append( (tmp_output['graph'], tmp_output[tmp_label])) elif interpretability_config["number_of_samples"] > 0: # Randomly sample from existing list: graph_idxes = list(range(len(output_for_metrics_calculation))) random.shuffle(graph_idxes) output_for_generating_saliency_map.update({ "layergradcam_%s_%s" % (str(assign_type), str(label)): [] for _, label in dataset_features["label_dict"].items() }) # Begin appending found samples for index in graph_idxes: tmp_label = output_for_metrics_calculation[index]['graph'].label element_name = "layergradcam_%s_%s" % (str(assign_type), str(tmp_label)) if len(output_for_generating_saliency_map[element_name] ) < interpretability_config["number_of_samples"]: output_for_generating_saliency_map[element_name].append( (output_for_metrics_calculation[index]['graph'], output_for_metrics_calculation[index][tmp_label])) return output_for_metrics_calculation, output_for_generating_saliency_map, execution_time
def saliency(classifier_model, config, dataset_features, GNNgraph_list, current_fold=None, cuda=0): ''' :param classifier_model: trained classifier model :param config: parsed configuration file of config.yml :param dataset_features: a dictionary of dataset features obtained from load_data.py :param GNNgraph_list: a list of GNNgraphs obtained from the dataset :param current_fold: has no use in this method :param cuda: whether to use GPU to perform conversion to tensor ''' # Initialise settings config = config interpretability_config = config["interpretability_methods"]["saliency"] dataset_features = dataset_features # Perform Saliency on the classifier model sl = Saliency(classifier_model) output_for_metrics_calculation = [] output_for_generating_saliency_map = {} # Obtain attribution score for use in qualitative metrics tmp_timing_list = [] for GNNgraph in GNNgraph_list: output = {'graph': GNNgraph} for _, label in dataset_features["label_dict"].items(): # Relabel all just in case, may only relabel those that need relabelling # if performance is poor original_label = GNNgraph.label GNNgraph.label = label node_feat, n2n, subg = graph_to_tensor( [GNNgraph], dataset_features["feat_dim"], dataset_features["edge_feat_dim"], cuda) start_generation = perf_counter() attribution = sl.attribute(node_feat, additional_forward_args=(n2n, subg, [GNNgraph]), target=label) tmp_timing_list.append(perf_counter() - start_generation) attribution_score = torch.sum(attribution, dim=1).tolist() attribution_score = standardize_scores(attribution_score) GNNgraph.label = original_label output[label] = attribution_score output_for_metrics_calculation.append(output) execution_time = sum(tmp_timing_list) / (len(tmp_timing_list)) # Obtain attribution score for use in generating saliency map for comparison with zero tensors if interpretability_config["sample_ids"] is not None: if ',' in str(interpretability_config["sample_ids"]): sample_graph_id_list = list( map(int, interpretability_config["sample_ids"].split(','))) else: sample_graph_id_list = [int(interpretability_config["sample_ids"])] output_for_generating_saliency_map.update({ "layergradcam_%s_%s" % (str(assign_type), str(label)): [] for _, label in dataset_features["label_dict"].items() }) for index in range(len(output_for_metrics_calculation)): tmp_output = output_for_metrics_calculation[index] tmp_label = tmp_output['graph'].label if tmp_output['graph'].graph_id in sample_graph_id_list: element_name = "layergradcam_%s_%s" % (str(assign_type), str(tmp_label)) output_for_generating_saliency_map[element_name].append( (tmp_output['graph'], tmp_output[tmp_label])) elif interpretability_config["number_of_samples"] > 0: # Randomly sample from existing list: graph_idxes = list(range(len(output_for_metrics_calculation))) random.shuffle(graph_idxes) output_for_generating_saliency_map.update({ "saliency_class_%s" % str(label): [] for _, label in dataset_features["label_dict"].items() }) # Begin appending found samples for index in graph_idxes: tmp_label = output_for_metrics_calculation[index]['graph'].label if len(output_for_generating_saliency_map["saliency_class_%s" % str(tmp_label)]) < \ interpretability_config["number_of_samples"]: output_for_generating_saliency_map[ "saliency_class_%s" % str(tmp_label)].append( (output_for_metrics_calculation[index]['graph'], output_for_metrics_calculation[index][tmp_label])) return output_for_metrics_calculation, output_for_generating_saliency_map, execution_time
def loop_dataset(g_list, classifier, sample_idxes, config, dataset_features, optimizer=None): bsize = max(config["general"]["batch_size"], 1) total_loss = [] total_iters = (len(sample_idxes) + (bsize - 1) * (optimizer is None)) // bsize pbar = tqdm(range(total_iters), unit='batch') all_targets = [] all_scores = [] n_samples = 0 # Create temporary timer dict to store timing data for this loop temp_timing_dict = {"forward": [], "backward": []} for pos in pbar: selected_idx = sample_idxes[pos * bsize:(pos + 1) * bsize] batch_graph = [g_list[idx] for idx in selected_idx] targets = [g_list[idx].label for idx in selected_idx] all_targets += targets node_feat, n2n, subg = graph_to_tensor( batch_graph, dataset_features["feat_dim"], dataset_features["edge_feat_dim"], cmd_args.cuda) subg = subg.size()[0] # Get Labels labels = torch.LongTensor(len(batch_graph)) for i in range(len(batch_graph)): labels[i] = batch_graph[i].label if cmd_args.cuda == 1: labels = labels.cuda() # Perform training start_forward = time.perf_counter() output = classifier(node_feat, n2n, subg, batch_graph) logits = F.log_softmax(output, dim=1) prob = F.softmax(logits, dim=1) # Calculate accuracy and loss loss = F.nll_loss(logits, labels) temp_timing_dict["forward"].append(time.perf_counter() - start_forward) pred = logits.data.max(1, keepdim=True)[1] acc = pred.eq(labels.data.view_as(pred)).cpu().sum().item() / float( labels.size()[0]) all_scores.append(prob.cpu().detach()) # for classification # Back propagation if optimizer is not None: start_backward = time.perf_counter() optimizer.zero_grad() loss.backward() optimizer.step() temp_timing_dict["backward"].append(time.perf_counter() - start_backward) loss = loss.data.cpu().detach().numpy() pbar.set_description('loss: %0.5f acc: %0.5f' % (loss, acc)) total_loss.append(np.array([loss, acc]) * len(selected_idx)) n_samples += len(selected_idx) if optimizer is None: assert n_samples == len(sample_idxes) total_loss = np.array(total_loss) avg_loss = np.sum(total_loss, 0) / n_samples roc_auc, prc_auc = auc_scores(all_targets, all_scores) avg_loss = np.concatenate((avg_loss, [roc_auc], [prc_auc])) # Append loop average to global timer tracking list. Only for training phase if optimizer is not None: timing_dict["forward"].append( sum(temp_timing_dict["forward"]) / len(temp_timing_dict["forward"])) timing_dict["backward"].append( sum(temp_timing_dict["backward"]) / len(temp_timing_dict["backward"])) return avg_loss
def loop_dataset(g_list, classifier, sample_idxes, config, dataset_features, optimizer=None): ''' :param g_list: list of graphs to trainover :param classifier: the initialised classifier :param sample_idxes: indexes to mark the training and test graphs :param config: Run configurations as stated in config.yml :param dataset_features: Dataset features obtained from load_data.py :param optimizer: optimizer to use :return: average loss and other model performance metrics ''' # print('*** 4 len(g_list): ', len(g_list)) # print('*** 5 sample_idxes: ', sample_idxes) # print('*** 6 config: ', config) n_samples = 0 all_targets = [] all_scores = [] total_loss = [] # Determine batch size and initialise progress bar (pbar) bsize = max(config["general"]["batch_size"], 1) total_iters = (len(sample_idxes) + (bsize - 1) * (optimizer is None)) // bsize # pbar = tqdm(range(total_iters), unit='batch') # print(f'*** 6 total_iters: {total_iters}') # Create temporary timer dict to store timing data for this loop temp_timing_dict = {"forward": [], "backward": []} # For each batch for pos in range(total_iters): selected_idx = sample_idxes[pos * bsize:(pos + 1) * bsize] batch_graph = [g_list[idx] for idx in selected_idx] targets = [g_list[idx].label for idx in selected_idx] all_targets += targets node_feat, n2n, subg = graph_to_tensor( batch_graph, dataset_features["feat_dim"], dataset_features["edge_feat_dim"], cmd_args.cuda) # Get graph labels of all graphs in batch labels = torch.LongTensor(len(batch_graph)) for i in range(len(batch_graph)): labels[i] = batch_graph[i].label if cmd_args.cuda == '1': #print('** main.py line 82: label cuda') labels = labels.cuda() # Perform training start_forward = time.perf_counter() # print('*** 7 node_feat: ', node_feat) # print('*** 8 n2n: ', n2n) # sys.exit() output = classifier(node_feat, n2n, subg, batch_graph) #print('** main.py line 88: output.is_cuda: ', output.is_cuda) temp_timing_dict["forward"].append(time.perf_counter() - start_forward) logits = F.log_softmax(output, dim=1) prob = F.softmax(logits, dim=1) # Calculate accuracy and loss #print('** main.py line 93: logits.is_cuda: ', logits.is_cuda) #print('** main.py line 94: labels.is_cuda: ', labels.is_cuda) loss = classifier.loss(logits, labels) pred = logits.data.max(1, keepdim=True)[1] acc = pred.eq(labels.data.view_as(pred)).cpu().sum().item() /\ float(labels.size()[0]) all_scores.append(prob.cpu().detach()) # for classification # Back propagate loss if optimizer is not None: optimizer.zero_grad() start_backward = time.perf_counter() loss.backward() temp_timing_dict["backward"].append(time.perf_counter() - start_backward) optimizer.step() loss = loss.data.cpu().detach().numpy() # print('loss: %0.5f acc: %0.5f' % (loss, acc)) total_loss.append(np.array([loss, acc]) * len(selected_idx)) n_samples += len(selected_idx) # print(f'output: {output}') # print(f'logits: {logits}') # print(f'prob: {prob}') # print(f'labels: {labels}') # sys.exit() if optimizer is None: assert n_samples == len(sample_idxes) # Calculate average loss and report performance metrics total_loss = np.array(total_loss) avg_loss = np.sum(total_loss, 0) / n_samples roc_auc, prc_auc = auc_scores(all_targets, all_scores) avg_loss = np.concatenate((avg_loss, [roc_auc], [prc_auc])) # print(f'\n\n\navg_loss: {avg_loss}') # print(f'type(avg_loss): {type(avg_loss)}') # print(f'all_targets: {all_targets}') # print(f'type(all_targets): {type(all_targets)}') # print(f'all_scores: {all_scores}') # print(f'type(all_scores): {type(all_scores)}') # print(f'all_scores.size(): {all_scores.size()}') # Append loop average to global timer tracking list. # Only for training phase if optimizer is not None: timing_dict["forward"].append( sum(temp_timing_dict["forward"]) / len(temp_timing_dict["forward"])) timing_dict["backward"].append( sum(temp_timing_dict["backward"]) / len(temp_timing_dict["backward"])) return avg_loss