示例#1
0
def distnd_adv(X, X_proj, clf, grid_size, inv_proj=None):
    if inv_proj is None:
        inv_proj = ILAMP()
        inv_proj.fit(X, X_proj)

    cells_orig = build_grid(X_proj, grid_size)

    num_features = X.shape[1]
    # list of samples generated by inverse projection
    invproj_samples = []
    # 2D points used to create back projection
    syn_proj = []

    import time
    print("generating inverse projection samples")
    s = time.time()
    for row in range(grid_size):
        for col in range(grid_size):
            if len(cells_orig[row][col]) > 0:
                continue

            coords = np.array([(col + 0.5) / grid_size,
                               (row + 0.5) / grid_size])
            sample = inv_proj.transform([coords], normalize=True)[0]
            invproj_samples.append(sample)
            syn_proj.append(coords)
    print("\ttime: ", time.time() - s)

    invproj_samples = np.array(invproj_samples)
    syn_proj = np.array(syn_proj)

    num_syn = invproj_samples.shape[0]
    num_orig = X.shape[0]
    num_total = num_orig + num_syn
    X_all = np.zeros((num_total, num_features))
    X_all[:num_orig] = X
    X_all[num_orig:] = invproj_samples

    X_proj_all = np.zeros((num_total, 2))
    X_proj_all[:num_orig] = X_proj
    X_proj_all[num_orig:] = syn_proj
    cells = build_grid(X_proj_all, grid_size)

    print("predicting all samples")
    s = time.time()
    y_all = clf.Predict(X_all)
    print("\ttime: ", time.time() - s)

    # foolbox model
    # TODO: compute bounds from X_min and X_max
    # TODO: make CLF class compute the adversarial model
    model = foolbox.models.KerasModel(clf.clf, bounds=(0.0, 1.0))
    attack = foolbox.attacks.FGSM(model)
    # attack_fallback = foolbox.attacks.BoundaryAttack(model)

    print("constructing annoy structure")
    s = time.time()
    t = annoy.AnnoyIndex(num_features)
    for i in range(num_total):
        t.add_item(i, X_all[i])
    t.build(num_total // 1000)
    print("\ttime: ", time.time() - s)

    dist_nd_adv = np.zeros((grid_size, grid_size))

    print("computing distance to boundary nd by adversarial examples")
    s = time.time()
    for row in range(grid_size):
        for col in range(grid_size):
            # print("row, col: ", row,  col)
            # s0 = time.time()
            sample_idx = cells[row][col][0]
            sample = X_all[sample_idx]
            # label_sample = clf.Predict(np.array([sample]))[0]
            sample_label = y_all[sample_idx]

            sample = sample.reshape(clf.shape)
            adversarial = attack(sample, sample_label)
            if adversarial is None:
                print("adversarial is None: ", row, col, "bisection")
                # adversarial = attack_fallback(sample, sample_label)
                # if adversarial is None:
                adv_idx = closest_diff_label(X_all, y_all, sample_idx,
                                             sample_label, t)
                if adv_idx == -1:
                    print("problem on: ", row, col)
                dist = dist_nd_bisection(X_all[sample_idx], X_all[adv_idx],
                                         clf)
                dist_nd_adv[row, col] = dist
                continue

            adversarial_label = np.argmax(model.predictions(adversarial))
            if sample_label == adversarial_label:
                print("error on: ", row, col)
                continue
            dist_nd_adv[row, col] = np.linalg.norm(sample - adversarial)
            # print("\ttime: ", time.time() - s0)

    print("\ttime: ", time.time() - s)
    return dist_nd_adv
示例#2
0
def distnd2(X, X_proj, clf, grid_size, inv_proj=None):
    if inv_proj is None:
        inv_proj = ILAMP()
        inv_proj.fit(X, X_proj)

    cells_orig = build_grid(X_proj, grid_size)

    num_features = X.shape[1]
    ilamp_samples = []
    ilamp_proj = []

    import time
    print("generating ilamp samples")
    s = time.time()
    for row in range(grid_size):
        for col in range(grid_size):
            if len(cells_orig[row][col]) == 0:
                coords = np.array([(col + 0.5) / grid_size,
                                   (row + 0.5) / grid_size])
                sample = inv_proj.transform([coords], normalize=True)[0]
                ilamp_samples.append(sample)
                ilamp_proj.append(coords)
    print("\ttime: ", time.time() - s)

    ilamp_samples = np.array(ilamp_samples)
    ilamp_proj = np.array(ilamp_proj)

    num_syn = ilamp_samples.shape[0]
    num_orig = X.shape[0]
    num_total = num_orig + num_syn
    X_all = np.zeros((num_total, num_features))
    X_all[:num_orig] = X
    X_all[num_orig:] = ilamp_samples

    X_proj_all = np.zeros((num_total, 2))
    X_proj_all[:num_orig] = X_proj
    X_proj_all[num_orig:] = ilamp_proj
    print("predicting all samples")
    s = time.time()
    y_all = clf.Predict(X_all)
    print("\ttime: ", time.time() - s)
    # print("computing distances nd")

    dist_nd_2 = np.zeros((grid_size, grid_size))
    cells = build_grid(X_proj_all, grid_size)

    print("constructing annoy structure")
    s = time.time()
    t = annoy.AnnoyIndex(num_features)
    for i in range(num_total):
        t.add_item(i, X_all[i])
    t.build(num_total // 1000)
    print("\ttime: ", time.time() - s)

    # print("constructing kdtree")
    # s = time.time()
    # tree = KDTree(X_all, leaf_size=100, metric='euclidean')
    # print("\ttime: ", time.time() - s)

    # distances_all = distance.cdist(X_all, X_all)
    print("computing distances between nd samples")
    s = time.time()
    for row in range(grid_size):
        # print("[distance_nd_2] row: ", row)
        for col in range(grid_size):
            # print("row, col: ", row,  col)
            # s0 = time.time()
            sample_idx = cells[row][col][0]
            sample = X_all[cells[row][col][0]]
            # label_sample = clf.Predict(np.array([sample]))[0]
            label_sample = y_all[sample_idx]

            # distances_sample = distance.cdist([sample], X_all)[0]
            # sorted_idx = np.argsort(distances_sample)
            # found = False
            # for idx in sorted_idx:
            #     if label_sample != y_all[idx]:
            #         dist_nd_2[row, col] = dist_nd_bisection(sample, X_all[idx], clf)
            #         found = True
            #         break
            # if found is False:
            #     print("error on pixel ", row, col)

            # print("row, col: ", row,  col)
            # s0 = time.time()

            # FIXME: this query makes no sense: all nodes are returned.
            # Should instead take all nodes that label is different and sort
            # them by distance.
            # X_diff = X_all[y_all != label_sample]
            # distances = distance_matrix([sample], X_diff).ravel()
            # distances = distance.cdist([sample], X_diff)[0]

            # idx = np.argmin(distances)
            # FIXME: this matrix can be computed once: distance betweeen
            # all the points and the select only the lines and columns with
            # different labels
            # dist_nd_2[row, col] = dist_nd_bisection(sample, X_diff[idx], clf)

            # dist, ind = tree.query([sample], k=num_total//3)
            # found = 0
            # # print("\tlooking for samples with different label")
            # # s = time.time()
            # for i in range(len(ind[0])):
            #     idx = ind[0][i]
            #     label_idx = clf.Predict(np.array([X_all[idx]]))[0]
            #     if label_idx != label_sample:
            #         dist_nd_2[row, col] = dist_nd_bisection(sample, X_all[idx],
            #                                                 clf)
            #         found += 1
            #         break
            # if found == 0:
            #     print("error on pixel ", row, col)
            # print("\ttime: ", time.time() - s0)

            found = False
            num_n_prev = 0
            target_idx = -1
            while found is False:
                num_n = num_n_prev + 1000
                nns = t.get_nns_by_item(sample_idx, num_n)
                for nn in nns[num_n_prev:num_n]:
                    if y_all[nn] != label_sample:
                        found = True
                        target_idx = nn
                        break
                num_n_prev = num_n

            if target_idx == -1:
                print("error on pixel ", row, col)
            dist_nd_2[row, col] = dist_nd_bisection(sample, X_all[target_idx],
                                                    clf)
            # print("\ttime: ", time.time() - s0)

    print("\ttime: ", time.time() - s)
    return dist_nd_2