def score(self, X: np.ndarray, batch_size: int = 64, return_predictions: bool = False) \ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]: """ Compute adversarial scores. Parameters ---------- X Batch of instances to analyze. batch_size Batch size used when computing scores. return_predictions Whether to return the predictions of the classifier on the original and reconstructed instances. Returns ------- Array with adversarial scores for each instance in the batch. """ # reconstructed instances X_recon = predict_batch(self.ae, X, batch_size=batch_size) # model predictions y = predict_batch(self.model, X, batch_size=batch_size, proba=True) y_recon = predict_batch(self.model, X_recon, batch_size=batch_size, proba=True) # scale predictions if self.temperature != 1.: y = y**(1 / self.temperature) y = y / tf.reshape(tf.reduce_sum(y, axis=-1), (-1, 1)) adv_score = kld(y, y_recon).numpy() # hidden layer predictions if isinstance(self.model_hl, list): for m, w in zip(self.model_hl, self.w_model_hl): h = predict_batch(m, X, batch_size=batch_size, proba=True) h_recon = predict_batch(m, X_recon, batch_size=batch_size, proba=True) adv_score += w * kld(h, h_recon).numpy() if return_predictions: return adv_score, y, y_recon else: return adv_score
def preprocess_drift( X: np.ndarray, model: tf.keras.Model = None, tokenizer=None, max_len: int = None, batch_size: int = int(1e10)) -> np.ndarray: """ Prediction function used for preprocessing step of drift detector. Parameters ---------- X Batch of instances. model Model used for preprocessing. tokenizer Optional tokenizer for text drift. max_len Optional max token length for text drift. batch_size Batch size. Returns ------- Numpy array with predictions. """ if tokenizer is None: return predict_batch(model, X, batch_size=batch_size) else: return predict_batch_transformer(model, tokenizer, X, max_len, batch_size=batch_size)
def score(self, X: np.ndarray, outlier_perc: float = 100., batch_size: int = int(1e10)) \ -> Tuple[np.ndarray, np.ndarray]: """ Compute feature and instance level outlier scores. Parameters ---------- X Univariate or multivariate time series. outlier_perc Percentage of sorted feature level outlier scores used to predict instance level outlier. batch_size Batch size used when making predictions with the seq2seq model. Returns ------- Feature and instance level outlier scores. """ # use the seq2seq model to reconstruct instances orig_shape = X.shape if len(orig_shape) == 2: X = X.reshape(self.shape) X_recon, threshold_est = predict_batch(self.seq2seq.decode_seq, X, batch_size=batch_size) if len(orig_shape) == 2: # reshape back to original shape X = X.reshape(orig_shape) X_recon = X_recon.reshape(orig_shape) threshold_est = threshold_est.reshape(orig_shape) # compute feature and instance level scores fscore = self.feature_score(X, X_recon, threshold_est) iscore = self.instance_score(fscore, outlier_perc=outlier_perc) return fscore, iscore
def score(self, X: np.ndarray, batch_size: int = int(1e10)) -> np.ndarray: """ Compute outlier scores. Parameters ---------- X Batch of instances to analyze. batch_size Batch size used when making predictions with the VAEGMM. Returns ------- Array with outlier scores for each instance in the batch. """ # draw samples from latent space X_samples = np.repeat(X, self.samples, axis=0) _, z, _ = predict_batch(self.vaegmm, X_samples, batch_size=batch_size) # compute average energy for samples energy, _ = gmm_energy(z, self.phi, self.mu, self.cov, self.L, self.log_det_cov, return_mean=False) energy_samples = energy.numpy().reshape((-1, self.samples)) iscore = np.mean(energy_samples, axis=-1) return iscore
def score(self, X: np.ndarray, outlier_perc: float = 100., batch_size: int = int(1e10)) \ -> Tuple[np.ndarray, np.ndarray]: """ Compute feature and instance level outlier scores. Parameters ---------- X Batch of instances. outlier_perc Percentage of sorted feature level outlier scores used to predict instance level outlier. batch_size Batch size used when making predictions with the VAE. Returns ------- Feature and instance level outlier scores. """ # sample reconstructed instances X_samples = np.repeat(X, self.samples, axis=0) X_recon = predict_batch(self.vae, X_samples, batch_size=batch_size) # compute feature and instance level scores fscore = self.feature_score(X_samples, X_recon) iscore = self.instance_score(fscore, outlier_perc=outlier_perc) return fscore, iscore
def hidden_output( X: np.ndarray, model: tf.keras.Model = None, layer: int = -1, input_shape: tuple = None, batch_size: int = int(1e10)) -> np.ndarray: """ Return hidden layer output from a model on a batch of instances. Parameters ---------- X Batch of instances. model tf.keras.Model. layer Hidden layer of model to use as output. The default of -1 would refer to the softmax layer. input_shape Optional input layer shape. batch_size Batch size used for the model predictions. Returns ------- Model predictions using the specified hidden layer as output layer. """ if input_shape and not model.inputs: inputs = Input(shape=input_shape) model.call(inputs) else: inputs = model.inputs hidden_model = Model(inputs=inputs, outputs=model.layers[layer].output) X_hidden = predict_batch(hidden_model, X, batch_size=batch_size) return X_hidden
def score(self, X: np.ndarray, batch_size: int = int(1e10), return_predictions: bool = False) \ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]: """ Compute adversarial scores. Parameters ---------- X Batch of instances to analyze. batch_size Batch size used when computing scores. return_predictions Whether to return the predictions of the classifier on the original and reconstructed instances. Returns ------- Array with adversarial scores for each instance in the batch. """ # model predictions y = predict_batch(self.model, X, batch_size=batch_size, proba=True) y_distilled = predict_batch(self.distilled_model, X, batch_size=batch_size, proba=True) # scale predictions if self.temperature != 1.: y = y**(1 / self.temperature) # type: ignore y = (y / tf.reshape(tf.reduce_sum(y, axis=-1), (-1, 1))).numpy() if self.loss_type == 'kld': score = kld(y, y_distilled).numpy() elif self.loss_type == 'xent': score = categorical_crossentropy(y, y_distilled).numpy() else: raise NotImplementedError if return_predictions: return score, y, y_distilled else: return score
def test_predict_batch(update_predict_batch): model, proba, return_class, shape = update_predict_batch preds = predict_batch(model, X, proba=proba, return_class=return_class, shape=shape) if isinstance(model, AE): assert preds.shape == X.shape elif isinstance(model, tf.keras.Model) and proba: assert preds.shape == (n, n_classes) elif isinstance(model, tf.keras.Model) and not proba and return_class: assert preds.shape == (n, ) elif isinstance(model, tf.keras.Model) and shape: assert preds.shape == shape
def uae(X: np.ndarray, encoder_net: tf.keras.Sequential = None, enc_dim: int = None, batch_size: int = int(1e10)) -> np.ndarray: """ Dimensionality reduction with an untrained autoencoder. Parameters ---------- X Batch of instances. encoder_net Encoder network as a tf.keras.Sequential model. enc_dim Alternatively, only the dimension of the encoding can be provided and a default network with 2 hidden layers is constructed. batch_size Batch size used when making predictions with the autoencoder. Returns ------- Encoded batch of instances. """ is_tf_seq = isinstance(encoder_net, tf.keras.Sequential) is_enc_dim = isinstance(enc_dim, int) if not is_tf_seq and is_enc_dim: # set default encoder input_dim = np.prod(X.shape[1:]) step_dim = int((input_dim - enc_dim) / 3) encoder_net = tf.keras.Sequential([ InputLayer(input_shape=X.shape[1:]), Flatten(), Dense(enc_dim + 2 * step_dim, activation=tf.nn.relu), Dense(enc_dim + step_dim, activation=tf.nn.relu), Dense(enc_dim, activation=None) ]) elif not is_tf_seq and not is_enc_dim: raise ValueError( 'Need to provide either `enc_dim` or a tf.keras.Sequential `encoder_net`.' ) enc = EncoderAE(encoder_net) X_enc = predict_batch(enc, X, batch_size=batch_size) return X_enc
def logp(self, dist, X: np.ndarray, return_per_feature: bool = False, batch_size: int = int(1e10)) \ -> np.ndarray: """ Compute log probability of a batch of instances under the generative model. Parameters ---------- dist Distribution of the model. X Batch of instances. return_per_feature Return log probability per feature. batch_size Batch size for the generative model evaluations. Returns ------- Log probabilities. """ logp_fn = partial(dist.log_prob, return_per_feature=return_per_feature) return predict_batch(logp_fn, X, batch_size=batch_size)
def logp_alt( self, model: tf.keras.Model, X: np.ndarray, return_per_feature: bool = False, batch_size: int = int(1e10) ) -> np.ndarray: """ Compute log probability of a batch of instances using the log_prob function defined by the user. Parameters ---------- model Trained model. X Batch of instances. return_per_feature Return log probability per feature. batch_size Batch size for the generative model evaluations. Returns ------- Log probabilities. """ if self.sequential: y, X = X[:, 1:], X[:, :-1] else: y = X.copy() y_preds = predict_batch(model, X, batch_size=batch_size) logp = self.log_prob(y, y_preds).numpy() if return_per_feature: return logp else: axis = tuple(np.arange(len(logp.shape))[1:]) return np.mean(logp, axis=axis)
def score(self, X: np.ndarray, batch_size: int = int(1e10)) -> np.ndarray: """ Compute outlier scores. Parameters ---------- X Batch of instances to analyze. batch_size Batch size used when making predictions with the AEGMM. Returns ------- Array with outlier scores for each instance in the batch. """ _, z, _ = predict_batch(self.aegmm, X, batch_size=batch_size) energy, _ = gmm_energy(z, self.phi, self.mu, self.cov, self.L, self.log_det_cov, return_mean=False) return energy.numpy()
def fit(self, X: np.ndarray, mutate_fn: Callable = mutate_categorical, mutate_fn_kwargs: dict = { 'rate': .2, 'seed': 0, 'feature_range': (0, 255) }, mutate_batch_size: int = int(1e10), loss_fn: tf.keras.losses = None, loss_fn_kwargs: dict = None, optimizer: tf.keras.optimizers = tf.keras.optimizers.Adam( learning_rate=1e-3), epochs: int = 20, batch_size: int = 64, verbose: bool = True, log_metric: Tuple[str, "tf.keras.metrics"] = None, callbacks: tf.keras.callbacks = None) -> None: """ Train semantic and background generative models. Parameters ---------- X Training batch. mutate_fn Mutation function used to generate the background dataset. mutate_fn_kwargs Kwargs for the mutation function used to generate the background dataset. Default values set for an image dataset. mutate_batch_size Batch size used to generate the mutations for the background dataset. loss_fn Loss function used for training. loss_fn_kwargs Kwargs for loss function. optimizer Optimizer used for training. epochs Number of training epochs. batch_size Batch size used for training. verbose Whether to print training progress. log_metric Additional metrics whose progress will be displayed if verbose equals True. callbacks Callbacks used during training. """ input_shape = X.shape[1:] # training arguments kwargs = { 'epochs': epochs, 'batch_size': batch_size, 'verbose': verbose, 'callbacks': callbacks } # create background data mutate_fn = partial(mutate_fn, **mutate_fn_kwargs) X_back = predict_batch(mutate_fn, X, batch_size=mutate_batch_size, shape=X.shape, dtype=X.dtype) # prepare sequential data if self.sequential and not self.has_log_prob: y, y_back = X[:, 1:], X_back[:, 1:] # type: ignore X, X_back = X[:, :-1], X_back[:, :-1] # type: ignore else: y, y_back = None, None # check if model needs to be built use_build = True if self.has_log_prob and not isinstance( self.dist_s, tf.keras.Model) else False if use_build: # build and train semantic model self.model_s = build_model(self.dist_s, input_shape)[0] self.model_s.compile(optimizer=optimizer) self.model_s.fit(X, **kwargs) # build and train background model self.model_b = build_model(self.dist_b, input_shape)[0] self.model_b.compile(optimizer=optimizer) self.model_b.fit(X_back, **kwargs) else: # update training arguments kwargs.update({ 'optimizer': optimizer, 'loss_fn_kwargs': loss_fn_kwargs, 'log_metric': log_metric }) # train semantic model args = [self.dist_s, loss_fn, X] kwargs.update({'y_train': y}) trainer(*args, **kwargs) # train background model args = [self.dist_b, loss_fn, X_back] kwargs.update({'y_train': y_back}) trainer(*args, **kwargs)