示例#1
0
def checkNodeClassification(attack, dataset: torch_geometric.data.Data, attacked_node: torch.Tensor, y_target: torch.Tensor,
                            print_answer: Print, attack_num):
    """
        checks if the node is currecly classified to y_target

        Parameters
        ----------
        attack: oneGNNAttack
        dataset: torch_geometric.data.Data
        attacked_node: torch.Tensor - the victim node
        y_target: torch.Tensor - the target labels of the attack
        print_answer: Print - the type of print
        attack_num: int - the index of the node (out of the train/val/test-set)

        Returns
        -------
        classified_to_target: torch.Tensor - the defence of the model
    """
    results = test(dataset.data, attack.model_wrapper.model, attack.targeted, attacked_node, y_target)
    classified_to_target = not results[3]

    if not classified_to_target and print_answer is Print.YES:
        attack_log = 'Attack: {:03d}, Node: {}, Misclassified already!\n' \
            .format(attack_num, attacked_node.item())
        if attack.mode.isAdversarial():
            attack_log = 'Adv Epoch: {:03d}, '.format(attack.idx) + attack_log
        print(attack_log, flush=True)
    return classified_to_target
def adversarialTrainer(attack):
    """
        trains the model adversarial (the model learns to classify correctly harmful feature matrices)
        
        Parameters
        ----------
        attack: oneGNNAttack
        
        Returns
        -------
        model: Model
        model_log: str
        test_accuracy: torch.Tensor
    """

    model = attack.model_wrapper.model  # important note: this is a fresh, untrained model!
    data = attack.getDataset().data

    patience_counter, best_val_accuracy = 0, 0
    adversarial_model_train_epochs = 200
    log_template = 'Adversarial Model - Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}, Attack: {:.4f}'

    model.attack = True
    # train in an adversarial way
    for epoch in range(0, adversarial_model_train_epochs):
        tmp_attack = copy.deepcopy(attack)
        tmp_attack.setIdx(epoch + 1)
        attacked_x, attacked_nodes, y_targets = \
            getTheMostHarmfulInput(attack=tmp_attack, approach=NodeApproach.TOPOLOGY)

        train(model=attack.model_wrapper.model,
              optimizer=attack.model_wrapper.optimizer,
              data=data,
              attacked_nodes=attacked_nodes,
              attacked_x=attacked_x)
        train_results = test(data=data,
                             model=attack.model_wrapper.model,
                             targeted=attack.targeted,
                             attacked_nodes=attacked_nodes,
                             y_targets=y_targets)
        print(log_template.format(epoch + 1, *train_results))

        # patience
        val_acc = train_results[1]
        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            patience_counter = 0
        else:
            patience_counter += 1
        if patience_counter >= attack.patience:
            break

    attack.model_wrapper.model.attack = False
    print()
    model_log = 'Adversarial Model - Train: {:.4f}, Val: {:.4f}, Test: {:.4f}, Attack: {:.4f}'\
        .format(*train_results)
    return attack.model_wrapper.model, model_log, train_results[2]
示例#3
0
def findMinimalEdges(sorted_edges: torch.Tensor, data, model, targeted: bool,
                     attacked_node: torch.Tensor, y_target: torch.Tensor,
                     node_num: int, print_flag: bool, log_template,
                     end_log_template):
    """
        flips each edge with a non-zero gradient
        this function is only available for multi approaches

        Parameters
        ----------
        sorted_edges: torch_geometric.data.Data.data - non-zero gradient edges, sorted by decreasing gradient
        data: torch_geometric.data.Data.data
        targeted: bool
        model: Model
        targeted: bool
        attacked_node: torch.Tensor - the victim node
        y_target: torch.Tensor - the target label of the attack
        node_num: int - the index of the attacked/victim node (out of the train/val/test-set)
        print_flag: bool - whether to print every iteration or not
        log_template: str - prefix of the log format
        end_log_template: str - suffix of the log format

        Returns
        -------
        attack_result: torch.Tensor
    """
    for edge_num, malicious_edge in enumerate(sorted_edges):
        model.edge_weight.data[
            malicious_edge] = not model.edge_weight.data[malicious_edge]
        attack_results = test(data=data,
                              model=model,
                              targeted=targeted,
                              attacked_nodes=attacked_node,
                              y_targets=y_target)
        if print_flag:
            print(log_template.format(node_num, edge_num + 2,
                                      *attack_results[:-1]),
                  flush=True,
                  end='')
        if attack_results[3]:
            break
    if print_flag:
        print(end_log_template.format(attack_results[-1]) + '\n', flush=True)
    return attack_results
示例#4
0
def attackTrainerDiscrete(attack, attacked_nodes: torch.Tensor,
                          y_targets: torch.Tensor,
                          malicious_nodes: torch.Tensor, node_num: int,
                          discrete_stop_after_1iter: bool) -> torch.Tensor:
    """
        a trainer function that attacks our model by changing the input attribute for a limited number of attributes
        1.attack the model with i attributes
        2.backprop
        3.add the attribute with the largest gradient as the i+1 attribute

        Parameters
        ----------
        attack: oneGNNAttack
        attacked_nodes: torch.Tensor - the victim nodes
        y_targets: torch.Tensor - the target labels of the attack
        malicious_nodes: torch.Tensor - the attacker/malicious node
        node_num: int - the index of the attacked/victim node (out of the train/val/test-set)
        discrete_stop_after_1iter: bool - whether or not to stop the discrete after 1 iteration
                                          this is a specific flag for the GRAD_CHOICE Approach

        Returns
        -------
        attack_results: torch.Tensor - 2d-tensor that includes
                                       1st-col - the defence
                                       2nd-col - the number of attributes used
        if the number of attributes is 0 the node is misclassified to begin with
    """
    # initialize
    model = attack.model_wrapper.model
    lr = attack.lr
    print_answer = attack.print_answer
    dataset = attack.getDataset()
    data = dataset.data
    num_attributes = data.x.shape[1]
    max_attributes_per_malicious = int(num_attributes * attack.l_0)
    max_attributes = max_attributes_per_malicious * malicious_nodes.shape[0]

    changed_attributes_all_malicious, epoch = 0, 0
    log_template = createLogTemplate(attack=attack, dataset=dataset)

    # changing the parameters which require grads and setting adversarial optimizer
    optimizer_params = setRequiresGrad(model=model,
                                       malicious_nodes=malicious_nodes)
    optimizer = torch.optim.Adam(params=optimizer_params, lr=lr)
    optimizer.zero_grad()

    # zero attributes
    with torch.no_grad():
        changed_attributes = 0
        for malicious_node in malicious_nodes:
            changed_attributes += model.node_attribute_list[malicious_node][
                0].sum().item()
            model.setNodesAttributes(idx_node=malicious_node,
                                     values=torch.zeros(num_attributes))

    # flip the attribute with the largest gradient
    model0 = copy.deepcopy(model)
    changed_attributes, prev_changed_attributes = 0, 0
    num_attributes_left = max_attributes_per_malicious * torch.ones_like(
        malicious_nodes).to(attack.device)
    while True:
        epoch += 1
        prev_model = copy.deepcopy(model)
        # train
        train(model=model,
              targeted=attack.targeted,
              attacked_nodes=attacked_nodes,
              y_targets=y_targets,
              optimizer=optimizer)
        num_attributes_left = flipUpBestNewAttributes(
            model=model,
            model0=prev_model,
            malicious_nodes=malicious_nodes,
            num_attributes_left=num_attributes_left)
        changed_attributes = max_attributes - num_attributes_left.sum().item()

        # test correctness
        test_discrete(model=model,
                      model0=model0,
                      malicious_nodes=malicious_nodes,
                      changed_attributes=changed_attributes,
                      max_attributes=max_attributes)

        # test
        results = test(data=data,
                       model=model,
                       targeted=attack.targeted,
                       attacked_nodes=attacked_nodes,
                       y_targets=y_targets)

        # prints
        if print_answer is not Print.NO and epoch != 1:
            print()
        if print_answer is Print.YES:
            print(log_template.format(node_num, epoch, changed_attributes,
                                      *results[:-1]),
                  flush=True,
                  end='')
        # breaks
        if results[
                3] or changed_attributes == max_attributes or changed_attributes == prev_changed_attributes:
            break
        prev_changed_attributes = changed_attributes
        if discrete_stop_after_1iter:
            break

    if print_answer is Print.YES:
        print(', Attack Success: {}\n'.format(results[-1]), flush=True)
    if changed_attributes > max_attributes:
        return torch.tensor([[results[3], max_attributes]]).type(torch.long)
    else:
        return torch.tensor([[results[3],
                              changed_attributes]]).type(torch.long)
def attackTrainerContinuous(attack, attacked_nodes: torch.Tensor,
                            y_targets: torch.Tensor,
                            malicious_nodes: torch.Tensor,
                            node_num: int) -> torch.Tensor:
    """
        a trainer function that attacks our model by changing the input attributes
        a successful attack is when we attack successfully AND embed the attributes

        Parameters
        ----------
        attack: oneGNNAttack
        attacked_nodes: torch.Tensor - the victim nodes
        y_targets: torch.Tensor - the target labels of the attack
        malicious_nodes: torch.Tensor - the attacker/malicious node
        node_num: int - the index of the attacked/victim node (out of the train/val/test-set)

        Returns
        -------
        attack_results: torch.Tensor - 2d-tensor that includes
                                       1st-col - the defence
                                       2nd-col - the number of attributes used
        if the number of attributes is 0 the node is misclassified to begin with
    """
    # initialize
    model = attack.model_wrapper.model
    attack_epochs = attack.attack_epochs
    lr = attack.lr
    print_answer = attack.print_answer
    dataset = attack.getDataset()
    data = dataset.data

    num_attributes = data.x.shape[1]
    max_attributes = num_attributes * malicious_nodes.shape[0]

    log_template = createLogTemplate(attack=attack, dataset=dataset)

    # changing the parameters which require grads and setting adversarial optimizer
    optimizer_params = setRequiresGrad(model=model,
                                       malicious_nodes=malicious_nodes)
    optimizer = torch.optim.Adam(params=optimizer_params, lr=lr)

    # find best_attributes
    model0 = copy.deepcopy(model)
    prev_changed_attributes = 0
    for epoch in range(0, attack_epochs):
        # train
        train(model=model,
              targeted=attack.targeted,
              attacked_nodes=attacked_nodes,
              y_targets=y_targets,
              optimizer=optimizer)

        # test correctness
        changed_attributes = (model.getInput() !=
                              model0.getInput())[malicious_nodes].sum().item()
        test_discrete(model=model,
                      model0=model0,
                      malicious_nodes=malicious_nodes,
                      changed_attributes=changed_attributes,
                      max_attributes=max_attributes)

        # test
        results = test(data=data,
                       model=model,
                       targeted=attack.targeted,
                       attacked_nodes=attacked_nodes,
                       y_targets=y_targets)

        # breaks
        if results[3]:
            # embed
            embeded_model = copy.deepcopy(model)
            for malicious_idx, malicious_node in enumerate(malicious_nodes):
                embedRowContinuous(model=embeded_model,
                                   malicious_node=malicious_node,
                                   model0=model0,
                                   l_inf=attack.l_inf)

            # test correctness
            changed_attributes = (
                embeded_model.getInput() !=
                model0.getInput())[malicious_nodes].sum().item()
            test_continuous(model=embeded_model,
                            model0=model0,
                            malicious_nodes=malicious_nodes,
                            changed_attributes=changed_attributes,
                            max_attributes=max_attributes,
                            l_inf=attack.l_inf)
            # test
            results = test(data=data,
                           model=embeded_model,
                           targeted=attack.targeted,
                           attacked_nodes=attacked_nodes,
                           y_targets=y_targets)
            if results[3]:
                if print_answer is Print.YES:
                    print(log_template.format(node_num, epoch + 1,
                                              *results[:-1]),
                          flush=True,
                          end='')
                break
        # prints
        if print_answer is Print.YES:
            print(log_template.format(node_num, epoch + 1, *results[:-1]),
                  flush=True,
                  end='')
        if changed_attributes == prev_changed_attributes:
            break
        prev_changed_attributes = changed_attributes

        if epoch != attack_epochs - 1 and print_answer is not Print.NO:
            print()

    if print_answer is Print.YES:
        print(', Attack Success: {}\n'.format(results[-1]), flush=True)
    if not results[3]:
        changed_attributes = max_attributes
    return torch.tensor([[results[3], changed_attributes]]).type(torch.long)
def edgeAttackVictim(attack, approach: Approach, print_flag: bool, attacked_node: torch.Tensor, y_target: torch.Tensor,
                     node_num: int) -> torch.Tensor:
    """
        chooses the edge we attack with from our pool of possible edges.
        the pool of possible edges changes per approach
        this BFS environments is also calculated according to our selected approach
        lastly, we attack using attackTrainer
        important note: the victim node is already known (attacked node)

        Parameters
        ----------
        attack: oneGNNAttack
        approach: Approach
        print_flag: bool - whether to print every iteration or not
        attacked_node: torch.Tensor - the victim node
        y_target: torch.Tensor - the target label of the attack
        node_num: int - the index of the attacked/victim node (out of the train/val/test-set)

        Returns
        -------
        attack_result: torch.Tensor
    """
    device = attack.device
    dataset = attack.getDataset()
    data = dataset.data
    model = attack.model_wrapper.model
    targeted = attack.targeted
    end_log_template = ', Attack Success: {}'

    neighbours_and_dist = kBFS(root=attacked_node, device=device, reversed_arr_list=dataset.reversed_arr_list,
                               K=model.num_layers - 1)
    if not neighbours_and_dist.nelement():
        if print_flag:
            print('Attack: {:03d}, Node: {} is a solo node'.format(node_num, attacked_node.item()), flush=True)
        return None
    malicious_indices = neighbours_and_dist[:, 0]
    if print_flag:
        print('Attack: {:03d}, Node: {}'.format(node_num, attacked_node.item()), flush=True, end='')

    # according to our approach choose the edge we wish to flip
    if approach is EdgeApproach.RANDOM:
        # select a random node on the graph and - malicious index
        # select a random node from our BFS of distance K-1 -  attacked node
        # use flipEdge
        malicious_index = np.random.choice(data.num_nodes, 1).item()
        new_attacked_node_index = np.random.choice(malicious_indices.shape[0] + 1, 1).item()
        if new_attacked_node_index == malicious_indices.shape[0]:
            new_attacked_node = attacked_node
        else:
            new_attacked_node = torch.tensor([malicious_indices[new_attacked_node_index].item()]).to(device)
        flipEdge(model=model, attacked_node=new_attacked_node, malicious_index=malicious_index, device=device)
        attack_results = test(data=data, model=model, targeted=targeted, attacked_nodes=new_attacked_node,
                              y_targets=y_target)

        if print_flag:
            print(end_log_template.format(attack_results[3]), flush=True)
    else:
        # EdgeApproach.SINGLE
        # select a random node on the graph - malicious index
        # Add all possible edges between the malicious index and the BFS of distance K-1
        # calculate the edge with the largest gradient and flip it, using edgeTrainer
        #
        # EdgeApproach.GRAD_CHOICE
        # Add all possible edges between all possible nodes and the BFS of distance K-1
        # calculate the edge with the largest gradient and flip it, using edgeTrainer
        malicious_index = model.expandEdgesByMalicious(dataset=dataset, approach=approach, attacked_node=attacked_node,
                                                       neighbours=malicious_indices, device=device)
        attack_results = edgeTrainer(data=data, approach=approach, targeted=targeted, model=model,
                                     attacked_node=attacked_node, y_target=y_target, node_num=node_num,
                                     malicious_index=malicious_index, device=device, print_flag=print_flag,
                                     end_log_template=end_log_template)
    if attack_results is None:
        print("Node approach doesnt exist", flush=True)
        quit()

    return attack_results[3]
def edgeTrainer(data, approach: Approach, targeted: bool, model,
                attacked_node: torch.Tensor, y_target: torch.Tensor, malicious_index: torch.Tensor, node_num: int,
                device, print_flag, end_log_template):
    """
        a forward pass function which chooses the edge with the largest gradient in edge_weight and flips it
        for multi approaches this process is repeated for each edge with a non-zero gradient

        Parameters
        ----------
        data: torch_geometric.data.Data.data
        approach: Approach
        targeted: bool
        model: Model
        attacked_node: torch.Tensor - the victim node
        y_target: torch.Tensor - the target label of the attack
        malicious_index: torch.Tensor - the attacker/malicious index
        node_num: int - the index of the attacked/victim node (out of the train/val/test-set)
        device: torch.cuda
        print_flag: bool - whether to print every iteration or not
        end_log_template: str - suffix of the log format

        Returns
        -------
        attack_result: torch.Tensor
    """
    log_template = '\nAttack: {:03d}, #Edge: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'

    edge_weight0 = model.edge_weight.clone().detach()
    optimizer_params = setRequiresGrad(model)
    optimizer = torch.optim.SGD(optimizer_params, lr=0.01)

    train(model=model, targeted=targeted, attacked_nodes=attacked_node, y_targets=y_target, optimizer=optimizer)

    with torch.no_grad():
        diff = model.edge_weight - edge_weight0
        mask1 = torch.logical_and(edge_weight0 == 1, diff > 0).to(device)
        mask2 = torch.logical_and(edge_weight0 == 0, diff < 0).to(device)
        mask = torch.logical_or(mask1, mask2).to(device)
        diff[mask] = 0
        abs_diff = torch.abs(diff)

        # when approach is grad you have the attacker chosen
        if not approach.isGlobal():
            malicious_mask = model.edge_index[0] != torch.tensor(malicious_index).to(device)
            abs_diff[malicious_mask] = 0

        # use of the best edge
        max_malicious_edge = torch.argmax(abs_diff).to(device)

        # when approach is globalGrad you can choose the attacker
        if approach.isGlobal():
            malicious_index = model.edge_index[0][max_malicious_edge]
            malicious_node_mask = model.edge_index[0] != malicious_index
            abs_diff[malicious_node_mask] = 0

        # return edge weights to back to original values and flip
        model.edge_weight.data = edge_weight0
        model.edge_weight.data[max_malicious_edge] = not model.edge_weight.data[max_malicious_edge]
        attack_results = test(data=data, model=model, targeted=targeted, attacked_nodes=attacked_node,
                              y_targets=y_target)
        if not approach.isMulti():
            if print_flag:
                print(end_log_template.format(attack_results[-1]), flush=True)
        else:
            malicious_node_abs_diff = abs_diff[abs_diff != 0]
            # sort edges by absolute diff
            _, sorted_malicious_edge = torch.sort(malicious_node_abs_diff, descending=True)
            if print_flag:
                print(', #Edges: {}'.format(sorted_malicious_edge.shape[0]), flush=True, end='')
                print(log_template.format(node_num, 1, *attack_results[:-1]), flush=True, end='')

            if not attack_results[3] and sorted_malicious_edge.shape[0] > 1:
                attack_results = \
                    findMinimalEdges(sorted_edges=sorted_malicious_edge[1:], data=data, model=model,
                                     targeted=targeted, attacked_node=attacked_node, y_target=y_target,
                                     node_num=node_num, print_flag=print_flag, log_template=log_template,
                                     end_log_template=end_log_template)
            elif print_flag:
                print(end_log_template.format(attack_results[-1]) + '\n', flush=True)
    return attack_results
示例#8
0
def attackVictim(attack, approach: Approach, attacked_node: torch.Tensor, y_target: torch.Tensor, node_num: int)\
        -> torch.Tensor:
    """
        chooses the node we attack with (the malicious node) from our BFS environment
        this BFS environments is also calculated according to our selected approach
        lastly, we attack using attackTrainer
        important note: the victim node is already known (attacked node)

        Parameters
        ----------
        attack: oneGNNAttack
        approach: Approach
        attacked_node: torch.Tensor - the victim node
        y_target: torch.Tensor - the target label of the attack
        node_num: int - the index of the attacked/victim node (out of the train/val/test-set)

        Returns
        -------
        attack_results: torch.Tensor - 2d-tensor that includes
                                       1st-col - the defence
                                       2nd-col - the number of attributes used
        if the number of attributes is 0 the node is misclassified to begin with
    """
    device = attack.device
    dataset = attack.getDataset()
    print_answer = attack.print_answer

    neighbours_and_dist = kBFS(root=attacked_node, device=device, reversed_arr_list=dataset.reversed_arr_list,
                               K=attack.num_layers)
    if neighbours_and_dist.nelement():
        neighbours_and_dist = manipulateNeighborhood(attack=attack, approach=approach, attacked_node=attacked_node,
                                                     neighbours_and_dist=neighbours_and_dist, device=device)
        attack_log = 'Attack: {:03d}, Node: {}, BFS clique: {}'.format(node_num, attacked_node.item(),
                                                                       neighbours_and_dist.shape[0] + 1)
    else:
        attack_log = 'Attack: {:03d}, Node: {} is a solo node'.format(node_num, attacked_node.item())
    # in adversarial mode add #Epoch
    if attack.mode.isAdversarial():
        attack_log = 'Adv Epoch: {:03d}, '.format(attack.idx) + attack_log

    # special cases of solo node and duo node for double
    BFS_size = neighbours_and_dist.shape[0]
    if not neighbours_and_dist.nelement():
        if print_answer is Print.YES:
            print(attack_log, flush=True)
        return None

    if print_answer is Print.YES:
        print(attack_log, end='', flush=True)
        if approach is not NodeApproach.MULTIPLE_ATTACKERS:
            print()
    malicious_node, attack = approach.getMaliciousNode(attack=attack, attacked_node=attacked_node, y_target=y_target,
                                                       node_num=node_num, neighbours_and_dist=neighbours_and_dist,
                                                       BFS_size=BFS_size)
    # calculates the malicious node for the irregular approaches
    if approach is NodeApproach.AGREE:
        print()
        malicious_node_heuristic = heuristicApproach(reversed_arr_list=dataset.reversed_arr_list,
                                                     neighbours_and_dist=neighbours_and_dist,
                                                     device=attack.device)
        malicious_node_gradient = gradientApproach(attack=attack, attacked_node=attacked_node, y_target=y_target,
                                                   node_num=node_num, neighbours_and_dist=neighbours_and_dist)
        attack_results = torch.zeros(1, 2)
        attack_results[0][0] = malicious_node_heuristic == malicious_node_gradient  # in attackSet we change to equal
        return attack_results

    if approach is NodeApproach.ZERO_FEATURES:
        model = attack.model_wrapper.model
        data = dataset.data
        zero_model = copy.deepcopy(model)
        # train
        zero_model.node_attribute_list[malicious_node][:] = 0

        # test correctness
        changed_attributes = (zero_model.getInput() != model.getInput())[malicious_node].sum().item()

        # test
        results = test(data=data, model=zero_model, targeted=attack.targeted,
                       attacked_nodes=attacked_node, y_targets=y_target)

        log_template = createLogTemplate(attack=attack, dataset=dataset) + ', Attack Success: {}\n'
        if dataset.type is DatasetType.DISCRETE:
            print(log_template.format(node_num, 1, changed_attributes, *results), flush=True)
        if dataset.type is DatasetType.CONTINUOUS:
            print(log_template.format(node_num, 1, *results), flush=True)
        attack_results = torch.tensor([[results[3], changed_attributes]])
        return attack_results

    if approach is NodeApproach.MULTIPLE_ATTACKERS:
        if malicious_node is None:
            if print_answer is Print.YES:
                print(f': Too small for {attack.num_of_attackers} attackers\n', flush=True)
            return None
        else:
            print()

    if approach is NodeApproach.INJECTION:
        dataset = attack.getDataset()
        classified_to_target = checkNodeClassification(attack=attack, dataset=dataset,
                                                       attacked_node=attacked_node, y_target=y_target,
                                                       print_answer=Print.NO, attack_num=node_num + 1)
        if not classified_to_target:
            print("misclassified right after injection!\n", flush=True)
            attack.model_wrapper.model.removeInjectedNode(attack=attack)
            return torch.tensor([[1, 0]])

    attack_results = attackTrainer(attack=attack, attacked_nodes=attacked_node, y_targets=y_target,
                                   malicious_nodes=malicious_node, node_num=node_num)

    if approach is NodeApproach.INJECTION:
        attack.model_wrapper.model.removeInjectedNode(attack=attack)
    return attack_results