from tda.tda_logging import get_logger import torch from tda.devices import device from tda.models import Architecture logger = get_logger("AdvGen") # One-hot vector based on scalar def one_hot(y, num_classes=None): if num_classes is None: classes, _ = y.max(0) num_classes = (classes.max() + 1).item() if y.dim() > 0: y_ = torch.zeros(len(y), num_classes, device=y.device) else: y_ = torch.zeros(1, num_classes) y_.scatter_(1, y.unsqueeze(-1), 1) y_ = y_.to(device) return y_ def ce_loss(outputs, labels, num_classes=None): """ Cross_entropy loss (output = post-softmax output of the model, and label = one-hot) """ labels = one_hot(labels, num_classes=num_classes) size = len(outputs)
import typing import numpy as np import pickle from tda.graph import Graph from tda.tda_logging import get_logger import typing logger = get_logger("PersistentDiagrams") max_float = np.finfo(np.float).max try: from dionysus import Filtration, Simplex, homology_persistence, init_diagrams except Exception as e: logger.warn(e) Filtration = None try: from persim import sliced_wasserstein as persim_sw from ripser import Rips except Exception as e: persim_sw = None def _prepare_edges_for_diagram(edge_list: typing.List): """ Enrich the edge list with the vertex and find their birth date """ timing_by_vertex = dict()
from scipy.sparse import coo_matrix from torch import Tensor import typing import numpy as np from tda.models.architectures import Architecture from tda.tda_logging import get_logger logger = get_logger("GraphComputation") class Graph(object): def __init__(self, edge_dict: typing.Dict): self._edge_dict = edge_dict @classmethod def from_architecture_and_data_point(cls, architecture: Architecture, x: Tensor): raw_edge_dict = architecture.get_graph_values(x) #logger.info(f"raw_edge_dict = {raw_edge_dict}") edge_dict = dict() for layer_link in raw_edge_dict: v = raw_edge_dict[layer_link] v = np.abs(v) * 10e5 edge_dict[layer_link] = v return cls(edge_dict=edge_dict) def thresholdize(self, thresholds, low_pass: bool): for layer_link in self._edge_dict: v = self._edge_dict[layer_link] # logger.info(f"layer link {layer_link} and shape of v = {v.todense().shape}")
from typing import Optional from scipy.sparse import coo_matrix, csr_matrix, diags from torch import nn import numpy as np from tda.tda_logging import get_logger logger = get_logger("Layer") import torch from tda.precision import default_tensor_type torch.set_default_tensor_type(default_tensor_type) class Layer(object): def __init__(self, func: nn.Module, graph_layer: bool, name: Optional[str] = None): self.func = func.type(default_tensor_type) self.graph_layer = graph_layer self._activations = None self.matrix = None self.name = name def build_matrix(self) -> coo_matrix: raise NotImplementedError() def get_matrix(self): ret = dict() for parentidx in self._activations: activ = self._activations[parentidx].reshape(-1) ret[parentidx] = coo_matrix(
import pytest from functools import reduce from tda.models import get_deep_model, cifar_lenet from tda.models.architectures import ( mnist_mlp, Architecture, mnist_lenet, svhn_lenet, cifar_toy_resnet, cifar_resnet_1, ) from tda.dataset.datasets import Dataset from tda.tda_logging import get_logger logger = get_logger("test_models") def test_get_mnist_model(): torch.manual_seed(37) random.seed(38) np.random.seed(39) source_dataset = Dataset("MNIST") _, val_acc, test_acc = get_deep_model( dataset=source_dataset, num_epochs=1, architecture=mnist_lenet, with_details=True, force_retrain=True, )
from abc import ABC import numpy as np from tda.tda_logging import get_logger ClassIndex = int logger = get_logger("CovarianceComputer") class CovarianceStreamComputer(ABC): """ Helper object to compute covariance matrices and mean of a stream of 1d vectors. """ def append(self, x: np.ndarray, clazz: ClassIndex): raise NotImplementedError() def mean_per_class(self, y: ClassIndex) -> np.ndarray: raise NotImplementedError() def precision_root(self) -> np.ndarray: raise NotImplementedError() def precision(self) -> np.ndarray: root = self.precision_root() return np.transpose(root) @ root
import fwg import numpy as np from joblib import Parallel, delayed from tda.embeddings.persistent_diagrams import ( sliced_wasserstein_kernel, compute_dgm_from_graph, ) from tda.embeddings.raw_graph import to_sparse_vector from tda.graph import Graph from tda.dataset.graph_dataset import DatasetLine from tda.models import Architecture from tda.tda_logging import get_logger logger = get_logger("Embeddings") class Embedding(NamedTuple): value: object time_taken: Dict class EmbeddingType(object): PersistentDiagram = "PersistentDiagram" RawGraph = "RawGraph" class KernelType(object): Euclidean = "Euclidean" RBF = "RBF"
from tda.embeddings import KernelType from tda.dataset.graph_dataset import DatasetLine from tda.tda_logging import get_logger from tda.models import mnist_mlp, Dataset, get_deep_model from tda.models.architectures import get_architecture, Architecture from tda.protocol import get_protocolar_datasets, evaluate_embeddings from tda.covariance import ( CovarianceStreamComputer, NaiveCovarianceStreamComputer, LedoitWolfComputer, NaiveSVDCovarianceStreamComputer, GraphicalLassoComputer, ) logger = get_logger("Mahalanobis") start_time = time.time() plot_path = f"{os.path.dirname(os.path.realpath(__file__))}/plots" if not os.path.exists(plot_path): os.mkdir(plot_path) # Custom types for better readability LayerIndex = int ClassIndex = int class Config(NamedTuple): # Number of epochs for the model epochs: int
import mlflow import numpy as np import pandas as pd from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split from sklearn.svm import OneClassSVM, SVC from sklearn.utils import check_random_state from tda.dataset.adversarial_generation import AttackBackend from tda.embeddings import get_gram_matrix from tda.dataset.graph_dataset import get_sample_dataset from tda.models import Architecture, Dataset from tda.tda_logging import get_logger logger = get_logger("C3PO") def get_protocolar_datasets( noise: float, dataset: Dataset, succ_adv: bool, archi: Architecture, dataset_size: int, attack_type: str, all_epsilons: typing.List, attack_backend: str = AttackBackend.FOOLBOX, compute_graph: bool = False, transfered_attacks: bool = False, ): logger.info("I will produce for you the protocolar datasets !")
from .layer import Layer from torch import nn from functools import reduce from numba import njit import numpy as np from scipy.sparse import coo_matrix, bmat as sparse_bmat from tda.tda_logging import get_logger import torch from tda.precision import default_tensor_type torch.set_default_tensor_type(default_tensor_type) logger = get_logger("ConvLayer") class ConvLayer(Layer): def __init__( self, in_channels, out_channels, kernel_size, input_shape=None, stride=1, padding=0, bias=False, activ=None, name=None, grouped_channels: bool = False, p=0.0, ):
import os import numpy as np import torch from tda.cache import cached from tda.dataset.adversarial_generation import AttackBackend, adversarial_generation from tda.devices import device from tda.graph import Graph from tda.tda_logging import get_logger from tda.models.architectures import Architecture, mnist_mlp from tda.dataset.datasets import Dataset from tda.rootpath import rootpath from tda.models import get_deep_model logger = get_logger("GraphDataset") def saved_adv_path(): directory = f"{rootpath}/saved_adversaries/" pathlib.Path(directory).mkdir(exist_ok=True, parents=True) return str(directory) def process_sample( sample: typing.Tuple, adversarial: bool, noise: float = 0, epsilon: float = 0, model: typing.Optional[Architecture] = None, attack_type: str = "FGSM",
import torch import os from tda.tda_logging import get_logger logger = get_logger("Devices") nb_cuda_devices = torch.cuda.device_count() logger.info(f"Found {nb_cuda_devices} devices compatible with CUDA") if nb_cuda_devices > 0: device = torch.device("cuda") else: device = torch.device("cpu") if os.environ.get("FORCE_CPU", "0") == "1": device = torch.device("cpu") logger.info(f"Device is {device}")
import inspect import os import torch import pathlib import socket from tda.rootpath import rootpath from tda.tda_logging import get_logger logger = get_logger("Cache") if os.path.exists("/var/opt/data/user_data"): # We are on gpu cache_root = f"/var/opt/data/user_data/tda/" elif "mesos" in socket.gethostname(): # We are in mozart cache_root = f"{os.environ['HOME']}/tda_cache/" else: # Other cases (local) cache_root = f"{rootpath}/cache/" logger.info(f"Cache root {cache_root}") def cached(my_func): arg_spec = inspect.getfullargspec(my_func).args def my_func_with_cache(*args, **kw): kw.update({arg_spec[i]: arg for i, arg in enumerate(args)}) base_path = f"{cache_root}/{my_func.__name__}/" pathlib.Path(base_path).mkdir(parents=True, exist_ok=True)
_trans = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.to(device)), transforms.Normalize((0.0, ), (1.0, )), ]) _trans_BandW = transforms.Compose([ transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), transforms.Lambda(lambda x: x.to(device)), transforms.Normalize((0.0, ), (1.0, )), ]) torch.manual_seed(1) seed(1) logger = get_logger("Datasets") class dsetsCircleToy(torch.utils.data.Dataset): def __init__(self, n_samples=5000, noise=0.05, factor=0.5): X_, Y_ = datasets.make_circles(n_samples=n_samples, shuffle=True, noise=noise, factor=factor) X_ = [(x_ + 1.3) / 2.6 for x_ in X_] Y__ = np.reshape(Y_, len(Y_)) self.X = torch.tensor(X_, dtype=torch.float) self.Y = torch.tensor(Y__, dtype=torch.long) self.n_samples = n_samples def __len__(self):
import torch from sklearn.metrics.pairwise import euclidean_distances import mlflow from tda.dataset.adversarial_generation import AttackType from tda.dataset.graph_dataset import DatasetLine from tda.embeddings import KernelType from tda.models import Dataset, get_deep_model, mnist_lenet from tda.models.layers import SoftMaxLayer from tda.models.architectures import get_architecture, Architecture from tda.protocol import get_protocolar_datasets, evaluate_embeddings from tda.tda_logging import get_logger mlflow.set_experiment("tda_adv_detection") logger = get_logger("LID") start_time = time.time() plot_path = f"{os.path.dirname(os.path.realpath(__file__))}/plots" if not os.path.exists(plot_path): os.mkdir(plot_path) class Config(NamedTuple): # Noise to consider for the noisy samples noise: float # Number of epochs for the model epochs: int # Dataset we consider (MNIST, SVHN) dataset: str
from tda.embeddings import ( get_embedding, EmbeddingType, KernelType, ThresholdStrategy, Embedding, ) from tda.embeddings.raw_graph import identify_active_indices, featurize_vectors from tda.graph_stats import get_quantiles_helpers from tda.models import get_deep_model, Dataset from tda.models.architectures import mnist_mlp, get_architecture from tda.protocol import get_protocolar_datasets, evaluate_embeddings from tda.tda_logging import get_logger from tda.threshold_underoptimized_edges import process_thresholds_underopt logger = get_logger("Detector") start_time = time.time() mlflow.set_experiment("tda_adv_detection") class Config(NamedTuple): # Type of embedding to use embedding_type: str # Type of kernel to use on the embeddings kernel_type: str # High threshold for the edges of the activation graph thresholds: str # Which thresholding strategy should we use threshold_strategy: str # Noise to consider for the noisy samples
import os import time from typing import Dict, Tuple from functools import reduce import numpy as np import torch import mlflow from numpy.random import Generator, PCG64 from tda.embeddings import ThresholdStrategy from tda.models import Architecture from tda.models.layers import ConvLayer from tda.tda_logging import get_logger logger = get_logger("Thresholds Underoptimized") def _process_raw_quantiles(raw_quantiles: str) -> Dict[int, Tuple]: ret = dict() for raw_quantile in raw_quantiles.split("_"): layer_idx, value_low, value_up = raw_quantile.split(":") ret[int(layer_idx)] = (float(value_low), float(value_up)) mlflow.log_metric(f"edges_quant_low_{int(layer_idx)}", float(value_low)) mlflow.log_metric(f"edges_quant_up_{int(layer_idx)}", float(value_up)) return ret def underopt_edges(quantiles: Dict, method: str, model: Architecture,
svhn_resnet_1, toy_mlp, toy_mlp2, toy_mlp3, toy_mlp4, efficientnet, ) from tda.dataset.datasets import Dataset from tda.models.layers import ConvLayer, LinearLayer from tda.rootpath import rootpath from tda.tda_logging import get_logger from tda.precision import default_tensor_type torch.set_default_tensor_type(default_tensor_type) logger = get_logger("Models") mlflow.set_experiment("tda_adv_detection") pathlib.Path("/tmp/tda/trained_models").mkdir(parents=True, exist_ok=True) class GradualWarmupScheduler(_LRScheduler): """ Gradually warm-up(increasing) learning rate in optimizer. Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'. Args: optimizer (Optimizer): Wrapped optimizer. multiplier: target learning rate = base lr * multiplier total_epoch: target learning rate is reached at total_epoch, gradually after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau) """
import mlflow import torch import matplotlib.pyplot as plt import numpy as np from tda.dataset.graph_dataset import get_sample_dataset, AttackBackend from tda.models import Dataset, get_deep_model from tda.models.architectures import Architecture from tda.models.architectures import get_architecture, svhn_lenet from tda.tda_logging import get_logger from tda.rootpath import rootpath start_time = time.time() logger = get_logger("GraphStats") mlflow.set_experiment("tda_adv_detection") class Config(typing.NamedTuple): # Noise to consider for the noisy samples noise: float # Number of epochs for the model epochs: int # Dataset we consider (MNIST, SVHN) dataset: str # Name of the architecture architecture: str # Noise to be added during the training of the model train_noise: float
# from art.classifiers import PyTorchClassifier import foolbox as fb from cached_property import cached_property from tda.devices import device from tda.models.layers import ( Layer, SoftMaxLayer, ) from tda.rootpath import model_dir from tda.tda_logging import get_logger from tda.precision import default_tensor_type torch.set_default_tensor_type(default_tensor_type) logger = get_logger("Architecture") ################# # Architectures # ################# logger = get_logger("Architecture") class Architecture(nn.Module): def __init__( self, layers: List[Layer], preprocess: Callable = None, layer_links: List[Tuple[int, int]] = None, name: str = "", ):