def get_roc_auc(trained_classifier_model, GNNgraph_list, dataset_features, cuda):
	trained_classifier_model.eval()
	score_list = []
	target_list = []

	if dataset_features["num_class"] > 2:
		print("Unable to calculate fidelity for multiclass datset")
		return 0

	# Instead of sending the whole list as batch,
	# do it one by one in case classifier do not support batch-processing
	# TODO: Enable batch processing support
	for GNNgraph in GNNgraph_list:
		node_feat, n2n, subg = graph_to_tensor(
            [GNNgraph], dataset_features["feat_dim"],
            dataset_features["edge_feat_dim"], cuda)

		output = trained_classifier_model(node_feat, n2n, subg, [GNNgraph])
		logits = F.log_softmax(output, dim=1)
		prob = F.softmax(logits, dim=1)

		score_list.append(prob.cpu().detach())
		target_list.append(GNNgraph.label)

	score_list = torch.cat(score_list).cpu().numpy()
	score_list = score_list[:, 1]

	roc_auc = metrics.roc_auc_score(
		target_list, score_list, average='macro')

	return roc_auc
def get_accuracy(trained_classifier_model, GNNgraph_list, dataset_features, cuda):
	trained_classifier_model.eval()
	true_equal_pred_pairs = []

	# Instead of sending the whole list as batch,
	# do it one by one in case classifier do not support batch-processing
	# TODO: Enable batch processing support
	for GNNgraph in GNNgraph_list:
		node_feat, n2n, subg = graph_to_tensor(
            [GNNgraph], dataset_features["feat_dim"],
            dataset_features["edge_feat_dim"], cuda)

		output = trained_classifier_model(node_feat, n2n, subg, [GNNgraph])
		logits = F.log_softmax(output, dim=1)
		pred = logits.data.max(1, keepdim=True)[1]

		if GNNgraph.label == int(pred[0]):
			true_equal_pred_pairs.append(1)
		else:
			true_equal_pred_pairs.append(0)

	return sum(true_equal_pred_pairs)/len(true_equal_pred_pairs)
def DeepLIFT(classifier_model, config, dataset_features, GNNgraph_list, current_fold, cuda=0):
	'''
		:param classifier_model: trained classifier model
		:param config: parsed configuration file of config.yml
		:param dataset_features: a dictionary of dataset features obtained from load_data.py
		:param GNNgraph_list: a list of GNNgraphs obtained from the dataset
		:param cuda: whether to use GPU to perform conversion to tensor
	'''
	# Initialise settings
	config = config
	interpretability_config = config["interpretability_methods"]["DeepLIFT"]
	dataset_features = dataset_features

	# Perform deeplift on the classifier model
	dl = DeepLift(classifier_model)

	output_for_metrics_calculation = []
	output_for_generating_saliency_map = {}

	# Obtain attribution score for use in qualitative metrics
	tmp_timing_list = []

	for GNNgraph in GNNgraph_list:
		output = {'graph': GNNgraph}
		for _, label in dataset_features["label_dict"].items():
			# Relabel all just in case, may only relabel those that need relabelling
			# if performance is poor
			original_label = GNNgraph.label
			GNNgraph.label = label

			node_feat, n2n, subg = graph_to_tensor(
				[GNNgraph], dataset_features["feat_dim"],
				dataset_features["edge_feat_dim"], cuda)

			start_generation = perf_counter()
			attribution = dl.attribute(node_feat,
								   additional_forward_args=(n2n, subg, [GNNgraph]),
								   target=label)
			tmp_timing_list.append(perf_counter() - start_generation)
			attribution_score = torch.sum(attribution, dim=1).tolist()
			attribution_score = standardize_scores(attribution_score)

			GNNgraph.label = original_label

			output[label] = attribution_score
		output_for_metrics_calculation.append(output)

	execution_time = sum(tmp_timing_list)/(len(tmp_timing_list))

	# Obtain attribution score for use in generating saliency map for comparison with zero tensors
	if interpretability_config["compare_with_zero_tensor"] is True:
		if interpretability_config["sample_ids"] is not None:
			if ',' in str(interpretability_config["sample_ids"]):
				sample_graph_id_list = list(map(int, interpretability_config["sample_ids"].split(',')))
			else:
				sample_graph_id_list = [int(interpretability_config["sample_ids"])]

			output_for_generating_saliency_map.update({"layergradcam_%s_%s" % (str(assign_type), str(label)): []
													   for _, label in dataset_features["label_dict"].items()})

			for index in range(len(output_for_metrics_calculation)):
				tmp_output = output_for_metrics_calculation[index]
				tmp_label = tmp_output['graph'].label
				if tmp_output['graph'].graph_id in sample_graph_id_list:
					element_name = "layergradcam_%s_%s" % (str(assign_type), str(tmp_label))
					output_for_generating_saliency_map[element_name].append(
						(tmp_output['graph'], tmp_output[tmp_label]))

		elif interpretability_config["number_of_zero_tensor_samples"] > 0:
			# Randomly sample from existing list:
			graph_idxes = list(range(len(output_for_metrics_calculation)))
			random.shuffle(graph_idxes)
			output_for_generating_saliency_map.update({"deeplift_zero_tensor_class_%s" % str(label): []
													   for _, label in dataset_features["label_dict"].items()})

			# Begin appending found samples
			for index in graph_idxes:
				tmp_label = output_for_metrics_calculation[index]['graph'].label
				element_name = "deeplift_zero_tensor_class_%s" % str(tmp_label)
				if len(output_for_generating_saliency_map[element_name]) < interpretability_config["number_of_zero_tensor_samples"]:
					output_for_generating_saliency_map[element_name].append(
						(output_for_metrics_calculation[index]['graph'], output_for_metrics_calculation[index][tmp_label]))

	# Obtain attribution score for use in generating saliency map for comparison with isomers
	if interpretability_config["compare_with_isomorphic_samples"] is True:
		if dataset_features["num_class"] != 2:
			print("DeepLIFT.py: Comparing with isomorphic samples is only possible in binary classification tasks.")
		else:
			# Get all isomorphic pairs
			class_0_graphs, class_1_graphs = get_isomorphic_pairs(
				dataset_features["name"], GNNgraph_list, config["run"]["k_fold"], current_fold,
				interpretability_config["number_of_isomorphic_sample_pairs"])

			# Generate attribution scores for the isomorphic pairs
			if class_0_graphs == None:
				pass
			elif len(class_0_graphs) == 0 or len(class_1_graphs) == 0:
				print("DeepLIFT: No isomorphic pairs found for test dataset")
			else:
				output_for_generating_saliency_map["deeplift_isomorphic_class_0"] = []
				output_for_generating_saliency_map["deeplift_isomorphic_class_1"] = []

				for graph_0, graph_1 in zip(class_0_graphs, class_1_graphs):
					node_feat_0, n2n, subg = graph_to_tensor(
						[graph_0], dataset_features["feat_dim"],
						dataset_features["edge_feat_dim"], cuda)

					node_feat_1, _, _ = graph_to_tensor(
						[graph_1], dataset_features["feat_dim"],
						dataset_features["edge_feat_dim"], cuda)

					attribution_0 = dl.attribute(node_feat_0,
						additional_forward_args=(n2n, subg, [graph_0]),
						baselines=node_feat_1,
						target=graph_0.label)

					attribution_1 = dl.attribute(node_feat_1,
						additional_forward_args=(n2n, subg, [graph_1]),
						baselines=node_feat_0,
						target=graph_1.label)

					attribution_score_0 = torch.sum(attribution_0, dim=1).tolist()
					attribution_score_1 = torch.sum(attribution_1, dim=1).tolist()

					attribution_score_0 = standardize_scores(attribution_score_0)
					attribution_score_1 = standardize_scores(attribution_score_1)

					output_for_generating_saliency_map["deeplift_isomorphic_class_0"].append(
						(graph_0, attribution_score_0))
					output_for_generating_saliency_map["deeplift_isomorphic_class_1"].append(
						(graph_1, attribution_score_1))

	return output_for_metrics_calculation, output_for_generating_saliency_map, execution_time
def LayerGradCAM(classifier_model,
                 config,
                 dataset_features,
                 GNNgraph_list,
                 current_fold=None,
                 cuda=0):
    '''
		Attribute to input layer using soft assign
		:param classifier_model: trained classifier model
		:param config: parsed configuration file of config.yml
		:param dataset_features: a dictionary of dataset features obtained from load_data.py
		:param GNNgraph_list: a list of GNNgraphs obtained from the dataset
		:param current_fold: has no use in this method
		:param cuda: whether to use GPU to perform conversion to tensor
	'''
    # Initialise settings
    config = config
    interpretability_config = config["interpretability_methods"][
        "LayerGradCAM"]
    dataset_features = dataset_features
    assign_type = interpretability_config["assign_attribution"]

    # Perform grad cam on the classifier model and on a specific layer
    layer_idx = interpretability_config["layer"]
    if layer_idx == 0:
        gc = LayerGradCam(classifier_model, classifier_model.graph_convolution)
    else:
        gc = LayerGradCam(classifier_model,
                          classifier_model.conv_modules[layer_idx - 1])

    output_for_metrics_calculation = []
    output_for_generating_saliency_map = {}

    # Obtain attribution score for use in qualitative metrics
    tmp_timing_list = []

    for GNNgraph in GNNgraph_list:
        output = {'graph': GNNgraph}
        for _, label in dataset_features["label_dict"].items():
            # Relabel all just in case, may only relabel those that need relabelling
            # if performance is poor
            original_label = GNNgraph.label
            GNNgraph.label = label

            node_feat, n2n, subg = graph_to_tensor(
                [GNNgraph], dataset_features["feat_dim"],
                dataset_features["edge_feat_dim"], cuda)

            start_generation = perf_counter()

            attribution = gc.attribute(node_feat,
                                       additional_forward_args=(n2n, subg,
                                                                [GNNgraph]),
                                       target=label,
                                       relu_attributions=True)

            # Attribute to the input layer using the assign method specified
            reverse_assign_tensor_list = []
            for i in range(1, layer_idx + 1):
                assign_tensor = classifier_model.cur_assign_tensor_list[i - 1]
                max_index = torch.argmax(assign_tensor, dim=1, keepdim=True)
                if assign_type == "hard":
                    reverse_assign_tensor = torch.transpose(
                        torch.zeros(assign_tensor.size()).scatter_(1,
                                                                   max_index,
                                                                   value=1), 0,
                        1)
                else:
                    reverse_assign_tensor = torch.transpose(
                        assign_tensor, 0, 1)

                reverse_assign_tensor_list.append(reverse_assign_tensor)

            attribution = torch.transpose(attribution, 0, 1)

            for reverse_tensor in reversed(reverse_assign_tensor_list):
                attribution = attribution @ reverse_tensor

            attribution = torch.transpose(attribution, 0, 1)
            tmp_timing_list.append(perf_counter() - start_generation)

            attribution_score = torch.sum(attribution, dim=1).tolist()
            attribution_score = standardize_scores(attribution_score)

            GNNgraph.label = original_label

            output[label] = attribution_score
        output_for_metrics_calculation.append(output)

    execution_time = sum(tmp_timing_list) / (len(tmp_timing_list))

    # Obtain attribution score for use in generating saliency map for comparison with zero tensors
    if interpretability_config["sample_ids"] is not None:
        if ',' in str(interpretability_config["sample_ids"]):
            sample_graph_id_list = list(
                map(int, interpretability_config["sample_ids"].split(',')))
        else:
            sample_graph_id_list = [int(interpretability_config["sample_ids"])]

        output_for_generating_saliency_map.update({
            "layergradcam_%s_%s" % (str(assign_type), str(label)): []
            for _, label in dataset_features["label_dict"].items()
        })

        for index in range(len(output_for_metrics_calculation)):
            tmp_output = output_for_metrics_calculation[index]
            tmp_label = tmp_output['graph'].label
            if tmp_output['graph'].graph_id in sample_graph_id_list:
                element_name = "layergradcam_%s_%s" % (str(assign_type),
                                                       str(tmp_label))
                output_for_generating_saliency_map[element_name].append(
                    (tmp_output['graph'], tmp_output[tmp_label]))

    elif interpretability_config["number_of_samples"] > 0:
        # Randomly sample from existing list:
        graph_idxes = list(range(len(output_for_metrics_calculation)))
        random.shuffle(graph_idxes)
        output_for_generating_saliency_map.update({
            "layergradcam_%s_%s" % (str(assign_type), str(label)): []
            for _, label in dataset_features["label_dict"].items()
        })

        # Begin appending found samples
        for index in graph_idxes:
            tmp_label = output_for_metrics_calculation[index]['graph'].label
            element_name = "layergradcam_%s_%s" % (str(assign_type),
                                                   str(tmp_label))
            if len(output_for_generating_saliency_map[element_name]
                   ) < interpretability_config["number_of_samples"]:
                output_for_generating_saliency_map[element_name].append(
                    (output_for_metrics_calculation[index]['graph'],
                     output_for_metrics_calculation[index][tmp_label]))

    return output_for_metrics_calculation, output_for_generating_saliency_map, execution_time
예제 #5
0
def saliency(classifier_model,
             config,
             dataset_features,
             GNNgraph_list,
             current_fold=None,
             cuda=0):
    '''
		:param classifier_model: trained classifier model
		:param config: parsed configuration file of config.yml
		:param dataset_features: a dictionary of dataset features obtained from load_data.py
		:param GNNgraph_list: a list of GNNgraphs obtained from the dataset
		:param current_fold: has no use in this method
		:param cuda: whether to use GPU to perform conversion to tensor
	'''
    # Initialise settings
    config = config
    interpretability_config = config["interpretability_methods"]["saliency"]
    dataset_features = dataset_features

    # Perform Saliency on the classifier model
    sl = Saliency(classifier_model)

    output_for_metrics_calculation = []
    output_for_generating_saliency_map = {}

    # Obtain attribution score for use in qualitative metrics
    tmp_timing_list = []

    for GNNgraph in GNNgraph_list:
        output = {'graph': GNNgraph}
        for _, label in dataset_features["label_dict"].items():
            # Relabel all just in case, may only relabel those that need relabelling
            # if performance is poor
            original_label = GNNgraph.label
            GNNgraph.label = label

            node_feat, n2n, subg = graph_to_tensor(
                [GNNgraph], dataset_features["feat_dim"],
                dataset_features["edge_feat_dim"], cuda)

            start_generation = perf_counter()
            attribution = sl.attribute(node_feat,
                                       additional_forward_args=(n2n, subg,
                                                                [GNNgraph]),
                                       target=label)

            tmp_timing_list.append(perf_counter() - start_generation)
            attribution_score = torch.sum(attribution, dim=1).tolist()
            attribution_score = standardize_scores(attribution_score)

            GNNgraph.label = original_label

            output[label] = attribution_score
        output_for_metrics_calculation.append(output)

    execution_time = sum(tmp_timing_list) / (len(tmp_timing_list))

    # Obtain attribution score for use in generating saliency map for comparison with zero tensors
    if interpretability_config["sample_ids"] is not None:
        if ',' in str(interpretability_config["sample_ids"]):
            sample_graph_id_list = list(
                map(int, interpretability_config["sample_ids"].split(',')))
        else:
            sample_graph_id_list = [int(interpretability_config["sample_ids"])]

        output_for_generating_saliency_map.update({
            "layergradcam_%s_%s" % (str(assign_type), str(label)): []
            for _, label in dataset_features["label_dict"].items()
        })

        for index in range(len(output_for_metrics_calculation)):
            tmp_output = output_for_metrics_calculation[index]
            tmp_label = tmp_output['graph'].label
            if tmp_output['graph'].graph_id in sample_graph_id_list:
                element_name = "layergradcam_%s_%s" % (str(assign_type),
                                                       str(tmp_label))
                output_for_generating_saliency_map[element_name].append(
                    (tmp_output['graph'], tmp_output[tmp_label]))

    elif interpretability_config["number_of_samples"] > 0:
        # Randomly sample from existing list:
        graph_idxes = list(range(len(output_for_metrics_calculation)))
        random.shuffle(graph_idxes)
        output_for_generating_saliency_map.update({
            "saliency_class_%s" % str(label): []
            for _, label in dataset_features["label_dict"].items()
        })

        # Begin appending found samples
        for index in graph_idxes:
            tmp_label = output_for_metrics_calculation[index]['graph'].label
            if len(output_for_generating_saliency_map["saliency_class_%s" % str(tmp_label)]) < \
             interpretability_config["number_of_samples"]:
                output_for_generating_saliency_map[
                    "saliency_class_%s" % str(tmp_label)].append(
                        (output_for_metrics_calculation[index]['graph'],
                         output_for_metrics_calculation[index][tmp_label]))

    return output_for_metrics_calculation, output_for_generating_saliency_map, execution_time
def loop_dataset(g_list,
                 classifier,
                 sample_idxes,
                 config,
                 dataset_features,
                 optimizer=None):
    bsize = max(config["general"]["batch_size"], 1)

    total_loss = []
    total_iters = (len(sample_idxes) + (bsize - 1) *
                   (optimizer is None)) // bsize
    pbar = tqdm(range(total_iters), unit='batch')
    all_targets = []
    all_scores = []

    n_samples = 0

    # Create temporary timer dict to store timing data for this loop
    temp_timing_dict = {"forward": [], "backward": []}

    for pos in pbar:
        selected_idx = sample_idxes[pos * bsize:(pos + 1) * bsize]

        batch_graph = [g_list[idx] for idx in selected_idx]
        targets = [g_list[idx].label for idx in selected_idx]
        all_targets += targets

        node_feat, n2n, subg = graph_to_tensor(
            batch_graph, dataset_features["feat_dim"],
            dataset_features["edge_feat_dim"], cmd_args.cuda)

        subg = subg.size()[0]

        # Get Labels
        labels = torch.LongTensor(len(batch_graph))

        for i in range(len(batch_graph)):
            labels[i] = batch_graph[i].label

        if cmd_args.cuda == 1:
            labels = labels.cuda()

        # Perform training
        start_forward = time.perf_counter()
        output = classifier(node_feat, n2n, subg, batch_graph)
        logits = F.log_softmax(output, dim=1)
        prob = F.softmax(logits, dim=1)

        # Calculate accuracy and loss
        loss = F.nll_loss(logits, labels)
        temp_timing_dict["forward"].append(time.perf_counter() - start_forward)
        pred = logits.data.max(1, keepdim=True)[1]
        acc = pred.eq(labels.data.view_as(pred)).cpu().sum().item() / float(
            labels.size()[0])
        all_scores.append(prob.cpu().detach())  # for classification

        # Back propagation
        if optimizer is not None:
            start_backward = time.perf_counter()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            temp_timing_dict["backward"].append(time.perf_counter() -
                                                start_backward)

        loss = loss.data.cpu().detach().numpy()
        pbar.set_description('loss: %0.5f acc: %0.5f' % (loss, acc))
        total_loss.append(np.array([loss, acc]) * len(selected_idx))

        n_samples += len(selected_idx)
    if optimizer is None:
        assert n_samples == len(sample_idxes)
    total_loss = np.array(total_loss)
    avg_loss = np.sum(total_loss, 0) / n_samples

    roc_auc, prc_auc = auc_scores(all_targets, all_scores)
    avg_loss = np.concatenate((avg_loss, [roc_auc], [prc_auc]))

    # Append loop average to global timer tracking list. Only for training phase
    if optimizer is not None:
        timing_dict["forward"].append(
            sum(temp_timing_dict["forward"]) /
            len(temp_timing_dict["forward"]))
        timing_dict["backward"].append(
            sum(temp_timing_dict["backward"]) /
            len(temp_timing_dict["backward"]))

    return avg_loss
예제 #7
0
def loop_dataset(g_list,
                 classifier,
                 sample_idxes,
                 config,
                 dataset_features,
                 optimizer=None):
    '''
	:param g_list: list of graphs to trainover
	:param classifier: the initialised classifier
	:param sample_idxes: indexes to mark the training and test graphs
	:param config: Run configurations as stated in config.yml
	:param dataset_features: Dataset features obtained from load_data.py
	:param optimizer: optimizer to use
	:return: average loss and other model performance metrics
	'''

    # print('*** 4 len(g_list): ', len(g_list))
    # print('*** 5 sample_idxes: ', sample_idxes)
    # print('*** 6 config: ', config)

    n_samples = 0
    all_targets = []
    all_scores = []
    total_loss = []

    # Determine batch size and initialise progress bar (pbar)
    bsize = max(config["general"]["batch_size"], 1)
    total_iters = (len(sample_idxes) + (bsize - 1) *
                   (optimizer is None)) // bsize
    # pbar = tqdm(range(total_iters), unit='batch')
    # print(f'*** 6 total_iters: {total_iters}')

    # Create temporary timer dict to store timing data for this loop
    temp_timing_dict = {"forward": [], "backward": []}

    # For each batch
    for pos in range(total_iters):
        selected_idx = sample_idxes[pos * bsize:(pos + 1) * bsize]

        batch_graph = [g_list[idx] for idx in selected_idx]
        targets = [g_list[idx].label for idx in selected_idx]
        all_targets += targets

        node_feat, n2n, subg = graph_to_tensor(
            batch_graph, dataset_features["feat_dim"],
            dataset_features["edge_feat_dim"], cmd_args.cuda)

        # Get graph labels of all graphs in batch
        labels = torch.LongTensor(len(batch_graph))

        for i in range(len(batch_graph)):
            labels[i] = batch_graph[i].label

        if cmd_args.cuda == '1':
            #print('** main.py line 82: label cuda')
            labels = labels.cuda()

        # Perform training
        start_forward = time.perf_counter()

        # print('*** 7 node_feat: ', node_feat)
        # print('*** 8 n2n: ', n2n)

        # sys.exit()

        output = classifier(node_feat, n2n, subg, batch_graph)
        #print('** main.py line 88: output.is_cuda: ', output.is_cuda)
        temp_timing_dict["forward"].append(time.perf_counter() - start_forward)
        logits = F.log_softmax(output, dim=1)
        prob = F.softmax(logits, dim=1)

        # Calculate accuracy and loss
        #print('** main.py line 93: logits.is_cuda: ', logits.is_cuda)
        #print('** main.py line 94: labels.is_cuda: ', labels.is_cuda)
        loss = classifier.loss(logits, labels)
        pred = logits.data.max(1, keepdim=True)[1]
        acc = pred.eq(labels.data.view_as(pred)).cpu().sum().item() /\
           float(labels.size()[0])
        all_scores.append(prob.cpu().detach())  # for classification

        # Back propagate loss
        if optimizer is not None:
            optimizer.zero_grad()
            start_backward = time.perf_counter()
            loss.backward()
            temp_timing_dict["backward"].append(time.perf_counter() -
                                                start_backward)
            optimizer.step()

        loss = loss.data.cpu().detach().numpy()
        # print('loss: %0.5f acc: %0.5f' % (loss, acc))
        total_loss.append(np.array([loss, acc]) * len(selected_idx))

        n_samples += len(selected_idx)
        # print(f'output: {output}')
        # print(f'logits: {logits}')
        # print(f'prob: {prob}')
        # print(f'labels: {labels}')
        # sys.exit()

    if optimizer is None:
        assert n_samples == len(sample_idxes)

    # Calculate average loss and report performance metrics
    total_loss = np.array(total_loss)
    avg_loss = np.sum(total_loss, 0) / n_samples
    roc_auc, prc_auc = auc_scores(all_targets, all_scores)
    avg_loss = np.concatenate((avg_loss, [roc_auc], [prc_auc]))
    # print(f'\n\n\navg_loss: {avg_loss}')
    # print(f'type(avg_loss): {type(avg_loss)}')
    # print(f'all_targets: {all_targets}')
    # print(f'type(all_targets): {type(all_targets)}')
    # print(f'all_scores: {all_scores}')
    # print(f'type(all_scores): {type(all_scores)}')
    # print(f'all_scores.size(): {all_scores.size()}')

    # Append loop average to global timer tracking list.
    # Only for training phase
    if optimizer is not None:
        timing_dict["forward"].append(
            sum(temp_timing_dict["forward"]) /
            len(temp_timing_dict["forward"]))
        timing_dict["backward"].append(
            sum(temp_timing_dict["backward"]) /
            len(temp_timing_dict["backward"]))

    return avg_loss