示例#1
0
def compute_backdoor_loss(params,
                          model,
                          criterion,
                          inputs_back,
                          labels_back,
                          grads=None):
    t = time.perf_counter()
    outputs = model(inputs_back)
    record_time(params, t, 'forward')

    if params.task == 'pipa':
        loss = criterion(outputs, labels_back)
        loss[labels_back == 0] *= 0.001
        if labels_back.sum().item() == 0.0:
            loss[:] = 0.0
        loss = loss.mean()
    else:
        loss = criterion(outputs, labels_back)
    if not params.dp:
        loss = loss.mean()

    if grads:
        grads = get_grads(params, model, loss)

    return loss, grads
示例#2
0
def get_grads(params, model, loss):
    t = time.perf_counter()
    grads = list(
        torch.autograd.grad(loss.mean(),
                            [x for x in model.parameters() if x.requires_grad],
                            retain_graph=True))
    record_time(params, t, 'backward')

    return grads
示例#3
0
def compute_latent_cosine_similarity(params: Params,
                                     model: Model,
                                     fixed_model: Model,
                                     inputs,
                                     grads=None):
    if not fixed_model:
        return torch.tensor(0.0), None
    t = time.perf_counter()
    with torch.no_grad():
        _, fixed_latent = fixed_model(inputs, latent=True)
    _, latent = model(inputs)
    record_time(params, t, 'forward')

    loss = -torch.cosine_similarity(latent, fixed_latent).mean() + 1
    if grads:
        grads = get_grads(params, model, loss)

    return loss, grads
示例#4
0
def compute_normal_loss(params, model, criterion, inputs, labels, grads):
    t = time.perf_counter()
    outputs = model(inputs)
    record_time(params, t, 'forward')
    loss = criterion(outputs, labels)

    if not params.dp:
        loss = loss.mean()

    if grads:
        t = time.perf_counter()
        grads = list(
            torch.autograd.grad(
                loss.mean(),
                [x for x in model.parameters() if x.requires_grad],
                retain_graph=True))
        record_time(params, t, 'backward')

    return loss, grads
示例#5
0
def get_latent_grads(params, model, inputs, labels):
    model.eval()
    model.zero_grad()
    t = time.perf_counter()
    pred, _ = model(inputs)
    record_time(params, t, 'forward')
    z = torch.zeros_like(pred)

    z[list(range(labels.shape[0])), labels] = 1

    pred = pred * z
    t = time.perf_counter()
    pred.sum().backward(retain_graph=True)
    record_time(params, t, 'backward')

    gradients = model.get_gradient()[labels == params.backdoor_label]
    pooled_gradients = torch.mean(gradients, dim=[0, 2, 3]).detach()
    model.zero_grad()

    return pooled_gradients
示例#6
0
def compute_spectral_evasion_loss(params: Params,
                                  model: Model,
                                  fixed_model: Model,
                                  inputs,
                                  grads=None):
    """
    Evades spectral analysis defense. Aims to preserve the latent representation
    on non-backdoored inputs. Uses a checkpoint non-backdoored `fixed_model` to
    compare the outputs. Uses euclidean distance as penalty.


    :param params: training parameters
    :param model: current model
    :param fixed_model: saved non-backdoored model as a reference.
    :param inputs: training data inputs
    :param grads: compute gradients.

    :return:
    """

    if not fixed_model:
        return torch.tensor(0.0), None
    t = time.perf_counter()
    with torch.no_grad():
        _, fixed_latent = fixed_model(inputs, latent=True)
    _, latent = model(inputs)
    record_time(params, t, 'latent_fixed')
    if params.spectral_similarity == 'norm':
        loss = torch.norm(latent - fixed_latent, dim=1).mean()
    elif params.spectral_similarity == 'cosine':
        loss = -torch.cosine_similarity(latent, fixed_latent).mean() + 1
    else:
        raise ValueError(f'Specify correct similarity metric for '
                         f'spectral evasion: [norm, cosine].')
    if grads:
        grads = get_grads(params, model, loss)

    return loss, grads
示例#7
0
def compute_sentinet_evasion(params,
                             model,
                             inputs,
                             inputs_back,
                             labels_back,
                             grads=None):
    """The GradCam design is taken from:
    https://medium.com/@stepanulyanin/implementing-grad-cam-in-pytorch-ea0937c31e82
    
    :param params: 
    :param model: 
    :param inputs: 
    :param inputs_back: 
    :param labels_back: 
    :param grads: 
    :return: 
    """
    pooled = get_latent_grads(params, model, inputs, labels_back)
    t = time.perf_counter()
    features = model.features(inputs)
    record_time(params, t, 'forward')
    features = features * pooled.view(1, 512, 1, 1)

    pooled_back = get_latent_grads(params, model, inputs_back, labels_back)
    t = time.perf_counter()
    back_features = model.features(inputs_back)
    record_time(params, t, 'forward')
    back_features = back_features * pooled_back.view(1, 512, 1, 1)

    features = torch.mean(features, dim=[0, 1], keepdim=True)
    features = F.relu(features) / features.max()

    back_features = torch.mean(back_features, dim=[0, 1], keepdim=True)
    back_features = F.relu(back_features) / back_features.max()
    loss = F.relu(back_features - features).max() * 10
    if grads:
        t = time.perf_counter()
        loss.backward(retain_graph=True)
        record_time(params, t, 'backward')
        grads = params.copy_grad(model)

    return loss, grads