Exemplo n.º 1
0
def generate_word_adv(model, args, numclass, data, device, index2word, word_index):
    inputs, target, idx, raw = data
    inputs, target = inputs.to(device), target.to(device)
    # origsample.append(inputs)
    # origsampleidx.append(idx)
    # tgt.append(target)
    # wtmp = []
    h, output = model(inputs)
    pred = torch.max(output, 1)[1].view(target.size())

    losses = scoring.scorefunc(args.scoring)(model, inputs, pred, numclass)

    sorted, indices = torch.sort(losses, dim=1, descending=True)

    advinputs = inputs.clone()

    # for k in range(inputs.size()[0]):
    #     wtmp.append([])
    #     for i in range(inputs.size()[1]):
    #         if advinputs[k, i].item() > 3:
    #             wtmp[-1].append(index2word[advinputs[k, i].item()])
    #         else:
    #             wtmp[-1].append('')

    for k in range(inputs.size()[0]):
        j = 0
        t = 0
        while j < args.power and t < inputs.size()[1]:
            if advinputs[k, indices[k][t]].item() > 3:
                word, advinputs[k, indices[k][t]] = transformer.transform(args.transformer)(
                    advinputs[k, indices[k][t]].item(), word_index, index2word, top_words=args.dictionarysize)
                # wtmp[k][indices[k][t]] = word
                # print(word)
                j += 1
            t += 1
    # a = ' '.join(wtmp[32])
    # print(a)
    return advinputs
Exemplo n.º 2
0
def visualize(input_str,
              dict_word=[],
              index2word=[],
              classes_list=[],
              power=5,
              scoring_alg='replaceone',
              transformer_alg='homoglyph',
              model=model,
              mode='word',
              maxlength=500,
              device=None,
              filter_char=default_filter,
              alphabet=default_alphabet):
    numclass = len(classes_list)

    if mode == 'word':

        input_seq = simple_tokenize(input_str, dict_word)
        input_seq = torch.Tensor(input_seq).long().view(1, -1)
        if device:
            input_seq = input_seq.to(device)
        res1 = model(input_seq)
        pred1 = torch.max(res1, 1)[1].view(-1)
        losses = scoring.scorefunc(scoring_alg)(model, input_seq, pred1,
                                                numclass)

        print(input_str)
        pred1 = pred1.item()
        print('original:', classes_list[pred1])

        sorted, indices = torch.sort(losses, dim=1, descending=True)

        advinputs = input_seq.clone()
        wtmp = []
        for i in range(input_seq.size()[1]):
            if advinputs[0, i].item() > 3:
                wtmp.append(index2word[advinputs[0, i].item()])
            else:
                wtmp.append('')
        j = 0
        t = 0
        while j < power and t < input_seq.size()[1]:
            if advinputs[0, indices[0][t]].item() > 3:
                word, advinputs[0, indices[0][t]] = transformer.transform(
                    transformer_alg)(advinputs[0, indices[0][t]].item(),
                                     dict_word,
                                     index2word,
                                     top_words=20000)
                wtmp[indices[0][t]] = word
                j += 1
            t += 1
        output2 = model(advinputs)
        pred2 = torch.max(output2, 1)[1].view(-1).item()
        adv_str = recoveradv(input_str.lower(), index2word, input_seq[0], wtmp)
        print(adv_str)
        print('adversarial:', classes_list[pred2])
        return (input_str, torch.exp(res1).detach().cpu()[0],
                classes_list[pred1], adv_str,
                torch.exp(output2).detach().cpu()[0], classes_list[pred2])
    elif mode == 'char':
        inputs = transchar(input_str, alphabet=alphabet)
        if device:
            inputs = inputs.to(device)
        output = model(inputs)
        pred1 = torch.max(output, 1)[1].view(-1)

        losses = scoring_char.scorefunc(scoring_alg)(model, inputs, pred1,
                                                     numclass)

        sorted, indices = torch.sort(losses, dim=1, descending=True)
        advinputs = inputs.clone()
        dt = inputs.sum(dim=1).int()
        j = 0
        t = 0
        md = input_str.lower()[:][::-1]
        while j < power and t < inputs.size()[2]:
            if dt[0, indices[0][t]].item() > 0:
                advinputs[0, :,
                          indices[0][t]], nowchar = transformer_char.transform(
                              transformer_alg)(inputs,
                                               torch.max(
                                                   advinputs[0, :,
                                                             indices[0][t]],
                                                   0)[1].item(), alphabet)
                md = md[:indices[0][t].item(
                )] + nowchar + md[indices[0][t].item() + 1:]
                j += 1
            t += 1
        md = md[::-1]
        output2 = model(advinputs)
        pred2 = torch.max(output2, 1)[1].view(-1)
        print(input_str)
        print('original:', classes_list[pred1.item()])
        print(md)
        print('adversarial:', classes_list[pred2.item()])
        return (input_str, torch.exp(output)[0].detach().cpu(),
                classes_list[pred1.item()], md,
                torch.exp(output2)[0].detach().cpu(),
                classes_list[pred2.item()])
    else:
        raise Exception('Wrong mode %s' % mode)
Exemplo n.º 3
0
def attackword(model, args, numclass, test_loader, device, index2word, word_index, maxbatch=None):
    corrects = .0
    total_loss = 0
    model.eval()
    wordinput = []
    tgt = []
    adv = []
    origsample = []
    origsampleidx = []

    for dataid, data in enumerate(test_loader):
        print(dataid)
        if maxbatch is not None and dataid >= maxbatch:
            break
        inputs, target, idx, raw = data
        inputs, target = inputs.to(device), target.to(device)
        origsample.append(inputs)
        origsampleidx.append(idx)
        tgt.append(target)
        wtmp = []
        output = model(inputs)
        pred = torch.max(output, 1)[1].view(target.size())

        losses = scoring.scorefunc(args.scoring)(model, inputs, pred, numclass)

        sorted, indices = torch.sort(losses, dim=1, descending=True)

        advinputs = inputs.clone()

        for k in range(inputs.size()[0]):
            wtmp.append([])
            for i in range(inputs.size()[1]):
                if advinputs[k, i].item() > 3:
                    wtmp[-1].append(index2word[advinputs[k, i].item()])
                else:
                    wtmp[-1].append('')
        for k in range(inputs.size()[0]):
            j = 0
            t = 0
            while j < args.power and t < inputs.size()[1]:
                if advinputs[k, indices[k][t]].item() > 3:
                    word, advinputs[k, indices[k][t]] = transformer.transform(args.transformer)(
                        advinputs[k, indices[k][t]].item(), word_index, index2word, top_words=args.dictionarysize)
                    wtmp[k][indices[k][t]] = word
                    print(word)
                    j += 1
                t += 1
        adv.append(advinputs)

        output2 = model(advinputs)
        pred2 = torch.max(output2, 1)[1].view(target.size())
        corrects += (pred2 == target).sum().item()
        for i in range(len(wtmp)):
            print(raw[i])
            print(pred[i].item())
            wordinputi = recoveradv(raw[i], index2word, inputs[i], wtmp[i])
            print(wordinputi)
            wordinput.append(wordinputi)
            print(pred2[i].item())

    target = torch.cat(tgt)
    advinputs = torch.cat(adv)
    origsamples = torch.cat(origsample)
    origsampleidx = torch.cat(origsampleidx)
    acc = corrects / advinputs.size(0)
    print('Accuracy %.5f' % acc)
    f = open('attack_log.txt', 'a')
    f.write('%d\t%d\t%s\t%s\t%s\t%d\t%.2f\n' % (
        args.data, args.wordlength, args.model, args.scoring, args.transformer, args.power, 100 * acc))
    if args.advsamplepath is None:
        advsamplepath = 'advsamples/%s_%d_%s_%s_%d_%d.dat' % (
            args.model, args.data, args.scoring, args.transformer, args.power, args.wordlength)
    else:
        advsamplepath = args.advsamplepath
    torch.save({'original': origsamples, 'sampleid': origsampleidx, 'wordinput': wordinput, 'advinputs': advinputs,
                'labels': target}, advsamplepath)
Exemplo n.º 4
0
def attackword(maxbatch=None):
    corrects = .0
    total_loss = 0
    model.eval()
    wordinput = []
    tgt = []
    adv = []
    origsample = []
    flagstore = True
    for dataid, data in enumerate(test_loader):
        print dataid
        if maxbatch != None and dataid >= maxbatch:
            break
        inputs, target = data
        inputs, target = Variable(inputs, volatile=True), Variable(target)
        inputs, target = inputs.cuda(), target.cuda()
        origsample.append(inputs.data)
        tgt.append(target.data)
        wtmp = []
        output = model(inputs)
        pred = Variable(torch.max(output, 1)[1].view(target.size()).data)

        losses = scoring.scorefunc(args.scoring)(model, inputs, pred, numclass)

        sorted, indices = torch.sort(losses, dim=1, descending=True)
        advinputs = inputs.clone()

        if flagstore:
            for k in xrange(inputs.size()[0]):
                wtmp.append([])
                for i in xrange(inputs.size()[1]):
                    if advinputs.data[k, i] > 3:
                        wtmp[-1].append(index2word[advinputs.data[k, i]])
                    else:
                        wtmp[-1].append('')
            for k in xrange(inputs.size()[0]):
                j = 0
                t = 0
                while j < args.power and t < inputs.size()[1]:
                    if advinputs.data[k, indices[k][t]] > 3:
                        word, advinputs.data[
                            k, indices[k][t]] = transformer.transform(
                                args.transformer)(
                                    advinputs[k, indices[k][t]].data[0],
                                    word_index,
                                    index2word,
                                    top_words=args.dictionarysize)
                        wtmp[k][indices[k][t]] = word
                        j += 1
                    t += 1
        else:
            for k in xrange(inputs.size()[0]):
                for i in xrange(args.power):
                    word, advinputs.data[
                        k, indices[k][i]] = transformer.transform(
                            args.transformer)(advinputs[k,
                                                        indices[k][i]].data[0],
                                              word_index,
                                              index2word,
                                              top_words=args.dictionarysize)
        adv.append(advinputs.data)
        for i in xrange(len(wtmp)):
            wordinput.append(wtmp[i])
        output2 = model(advinputs)
        pred2 = torch.max(output2, 1)[1].view(target.size()).data
        corrects += (pred2 == target.data).sum()
    print wordinput[0]

    target = torch.cat(tgt)
    advinputs = torch.cat(adv)
    origsamples = torch.cat(origsample)
    acc = corrects / advinputs.size(0)
    print('Accuracy %.5f' % (acc))
    f = open('attack.txt', 'a')
    f.write('%d\t%s\t%s\t%s\t%d\t%.2f\n' %
            (args.data, args.model, args.scoring, args.transformer, args.power,
             100 * acc))
    if args.advsamplepath == None:
        advsamplepath = 'advsamples/%s_%d_%s_%s_%d.dat' % (
            args.model, args.data, args.scoring, args.transformer, args.power)
    else:
        advsamplepath = args.advsamplepath
    torch.save(
        {
            'original': origsamples,
            'wordinput': wordinput,
            'advinputs': advinputs,
            'labels': target
        }, advsamplepath)
Exemplo n.º 5
0
def attackword(maxbatch=None):
    corrects = .0
    total_loss = 0
    model.eval()
    wordinput = []
    tgt = []
    adv = []
    origsample = []
    flagstore = True
    for dataid, data in enumerate(train_loader):
        print dataid
        if maxbatch != None and dataid > maxbatch:
            break
        inputs, target = data
        inputs, target = Variable(inputs, volatile=True), Variable(target)
        inputs, target = inputs.cuda(), target.cuda()
        origsample.append(inputs.data)
        tgt.append(target.data)
        wtmp = []
        output = model(inputs)
        # loss = F.nll_loss(output, targenst)
        # total_loss += loss.data[0]
        pred = Variable(torch.max(output, 1)[1].view(target.size()).data)

        losses = scoring.scorefunc(args.scoring)(model, inputs, pred, numclass)

        sorted, indices = torch.sort(losses, dim=1, descending=True)
        # print losses
        # print indices
        advinputs = inputs.clone()

        if flagstore:
            for k in xrange(inputs.size()[0]):
                wtmp.append([])
                for i in xrange(inputs.size()[1]):
                    if advinputs.data[k, i] > 3:
                        wtmp[-1].append(index2word[advinputs.data[k, i]])
                    else:
                        wtmp[-1].append('')
            for k in xrange(inputs.size()[0]):
                j = 0
                t = 0
                while j < args.power and t < inputs.size()[1]:
                    # if advinputs.data[k,indices[k][t]]>3 and losses[k,indices[k][t]]>0:
                    if advinputs.data[k, indices[k][t]] > 3:
                        word, advinputs.data[
                            k, indices[k][t]] = transformer.transform(
                                args.transformer)(
                                    advinputs[k, indices[k][t]].data[0],
                                    word_index, index2word)
                        wtmp[k][indices[k][t]] = word
                        j += 1
                    t += 1
                # for i in xrange(args.power):
                # word, advinputs.data[k,indices[k][i]] = transformer.transform(args.transformer)(advinputs[k,indices[k][i]].data[0],word_index,index2word)
                # wordinput[k][indices[k][i]] = word
        else:
            for k in xrange(inputs.size()[0]):
                for i in xrange(args.power):
                    word, advinputs.data[
                        k, indices[k][i]] = transformer.transform(
                            args.transformer)(advinputs[k,
                                                        indices[k][i]].data[0],
                                              word_index, index2word)
        adv.append(advinputs.data)
        for i in xrange(len(wtmp)):
            wordinput.append(wtmp[i])
        # inputs2 = Variable(advinputs, volatile=True)
        output2 = model(advinputs)
        pred2 = torch.max(output2, 1)[1].view(target.size()).data
        # print inputs.size()
        # inputs = Variable(inputs, volatile=True)
        # logit = model(inputs)
        # pred1 = torch.max(logit, 1)[1].view(target.size()).data
        # accumulated_loss += F.nll_loss(logit, target, size_average=False).data[0]
        corrects += (pred2 == target.data).sum()
        # predicates_all+=predicates.cpu().numpy().tolist()
        # target_all+=target.data.cpu().numpy().tolist()
        # print corrects
    print wordinput[0]

    # correct += pred.eq(target.data.view_as(pred)).cpu().sum()
    # acc = corrects/len(test_loader.dataset)
    # avg_loss = total_loss/len(test_loader.dataset)
    target = torch.cat(tgt)
    advinputs = torch.cat(adv)
    origsamples = torch.cat(origsample)
    acc = corrects / advinputs.size(0)
    print('Accuracy %.5f' % (acc))
    f = open('attack_train.txt', 'a')
    f.write('%d\t%s\t%s\t%s\t%d\t%.2f\n' %
            (args.data, args.model, args.scoring, args.transformer, args.power,
             100 * acc))
    if args.advsamplepath == None:
        advsamplepath = 'advsamples2/%s_%d_%s_%s_%d_train.dat' % (
            args.model, args.data, args.scoring, args.transformer, args.power)
    else:
        advsamplepath = args.advsamplepath
    torch.save(
        {
            'original': origsamples,
            'wordinput': wordinput,
            'advinputs': advinputs,
            'labels': target
        }, advsamplepath)