def __init__(self, dataarray, only_tri=False, silence_level=0): """ Initialize an instance of CouplingAnalysisPurePython. Possible choices for only_tri: - "True" will calculate only the upper triangle of the coupling matrix, excluding the diagonal, assuming symmetry (not for directed measures) - "False" will calculate the whole matrix (asymmetry somes from different integration ranges) :type dataarray: 4D, 3D or 2D Numpy array [time, index, index] or [time, index] :arg dataarray: The time series array with time in first dimension :arg bool only_tri: Symmetric/asymmetric assumption on coupling matrix. :arg int silence_level: The inverse level of verbosity of the object. """ # only_tri will calculate the upper triangle excluding the diagonal # only. This assumes stationarity on the time series self.only_tri = only_tri # Set silence level self.silence_level = silence_level # Flatten observable anomaly array along lon/lat dimension to allow # for more convinient indexing and transpose the whole array as this # is faster in loops if numpy.ndim(dataarray) == 4: (self.total_time, n_lev, n_lat, n_lon) = dataarray.shape self.N = n_lev * n_lat * n_lon self.dataarray = numpy.\ fastCopyAndTranspose(dataarray.reshape(-1, self.N)) if numpy.ndim(dataarray) == 3: (self.total_time, n_lat, n_lon) = dataarray.shape self.N = n_lat * n_lon self.dataarray = numpy.\ fastCopyAndTranspose(dataarray.reshape(-1, self.N)) elif numpy.ndim(dataarray) == 2: (self.total_time, self.N) = dataarray.shape self.dataarray = numpy.fastCopyAndTranspose(dataarray) else: print("irregular array shape...") self.dataarray = numpy.fastCopyAndTranspose(dataarray) # factorials below 10 in a list for permutation patterns self.factorial = \ numpy.array([1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880]) self.patternized = False self.has_fft = False self.originalFFT = None # lag_mode dict self.lag_modi = {"all": 0, "sum": 1, "max": 2}
def __init__(self, dataarray, only_tri=False, silence_level=0): """ Initialize an instance of CouplingAnalysisPurePython. Possible choices for only_tri: - "True" will calculate only the upper triangle of the coupling matrix, excluding the diagonal, assuming symmetry (not for directed measures) - "False" will calculate the whole matrix (asymmetry somes from different integration ranges) :type dataarray: 4D, 3D or 2D Numpy array [time, index, index] or [time, index] :arg dataarray: The time series array with time in first dimension :arg bool only_tri: Symmetric/asymmetric assumption on coupling matrix. :arg int silence_level: The inverse level of verbosity of the object. """ # only_tri will calculate the upper triangle excluding the diagonal # only. This assumes stationarity on the time series self.only_tri = only_tri # Set silence level self.silence_level = silence_level # Flatten observable anomaly array along lon/lat dimension to allow # for more convinient indexing and transpose the whole array as this # is faster in loops if numpy.ndim(dataarray) == 4: (self.total_time, n_lev, n_lat, n_lon) = dataarray.shape self.N = n_lev * n_lat * n_lon self.dataarray = numpy.\ fastCopyAndTranspose(dataarray.reshape(-1, self.N)) if numpy.ndim(dataarray) == 3: (self.total_time, n_lat, n_lon) = dataarray.shape self.N = n_lat * n_lon self.dataarray = numpy.\ fastCopyAndTranspose(dataarray.reshape(-1, self.N)) elif numpy.ndim(dataarray) == 2: (self.total_time, self.N) = dataarray.shape self.dataarray = numpy.fastCopyAndTranspose(dataarray) else: print "irregular array shape..." self.dataarray = numpy.fastCopyAndTranspose(dataarray) # factorials below 10 in a list for permutation patterns self.factorial = \ numpy.array([1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880]) self.patternized = False self.has_fft = False self.originalFFT = None # lag_mode dict self.lag_modi = {"all": 0, "sum": 1, "max": 2}
def _get_train_data(self, parents, target, tau_max): Y = [(target, 0)] X = [(target, i) for i in range(-1 * tau_max, 0)] X = X + [(parent, i) for parent, i in it.product(parents, range(-1 * tau_max, 0))] array, xyz, XYZ = self.dataframe.construct_array( X=X, Y=Y, Z=Y, tau_max=tau_max, return_cleaned_xyz=True, do_checks=False) dim, T = array.shape xx = np.where(xyz == 0)[0] yy = np.where(xyz == 1)[0] zz = np.where(xyz == 2)[0] Xset, Yset, Zset = xx, yy, zz arrayT = np.fastCopyAndTranspose(array) train_x = arrayT[:, Xset] train_y = arrayT[:, Yset] del arrayT, array return train_x, train_y
def _get_single_residuals(self, array, target_var, standardize=True, return_means=False): """Returns residuals of linear multiple regression. Performs a OLS regression of the variable indexed by target_var on the conditions Z. Here array is assumed to contain X and Y as the first two rows with the remaining rows (if present) containing the conditions Z. Optionally returns the estimated regression line. Parameters ---------- array : array-like data array with X, Y, Z in rows and observations in columns target_var : {0, 1} Variable to regress out conditions from. standardize : bool, optional (default: True) Whether to standardize the array beforehand. Must be used for partial correlation. return_means : bool, optional (default: False) Whether to return the estimated regression line. Returns ------- resid [, mean] : array-like The residual of the regression and optionally the estimated line. """ dim, T = array.shape dim_z = dim - 2 # Standardize if standardize: array -= array.mean(axis=1).reshape(dim, 1) array /= array.std(axis=1).reshape(dim, 1) # print(array) if np.isnan(array).sum() != 0: raise ValueError("nans after standardizing, " "possibly constant array!") y = array[target_var, :] if dim_z > 0: z = np.fastCopyAndTranspose(array[2:, :]) beta_hat = np.linalg.lstsq(z, y, rcond=None)[0] mean = np.dot(z, beta_hat) resid = y - mean else: resid = y mean = None if return_means: return (resid, mean) return resid
def test_fastCopyAndTranspose(): # 0D array a = np.array(2) b = np.fastCopyAndTranspose(a) assert_equal(b, a.T) assert_(b.flags.owndata) # 1D array a = np.array([3, 2, 7, 0]) b = np.fastCopyAndTranspose(a) assert_equal(b, a.T) assert_(b.flags.owndata) # 2D array a = np.arange(6).reshape(2, 3) b = np.fastCopyAndTranspose(a) assert_equal(b, a.T) assert_(b.flags.owndata)
def _cython_calculate_mutual_information(self, anomaly, n_bins=32): """ Calculate the mutual information matrix at zero lag. The cython code is adopted from the Tisean 3.0.1 mutual.c module. :type anomaly: 2D Numpy array (time, index) :arg anomaly: The anomaly time series. :arg int n_bins: The number of bins for estimating probability distributions. :arg bool fast: Indicates, whether fast or slow algorithm should be used. :rtype: 2D array (index, index) :return: the mutual information matrix at zero lag. """ if self.silence_level <= 1: print "Calculating mutual information matrix at zero lag from \ anomaly values using cython..." # Normalize anomaly time series to zero mean and unit variance self.data.normalize_time_series_array(anomaly) # Create local transposed copy of anomaly anomaly = np.fastCopyAndTranspose(anomaly) (N, n_samples) = anomaly.shape # Get common range for all histograms range_min = float(anomaly.min()) range_max = float(anomaly.max()) # Rescale all time series to the interval [0,1], # using the maximum range of the whole dataset. scaling = float(1./(range_max - range_min)) # Create array to hold symbolic trajectories symbolic = np.empty((N, n_samples), dtype='int64') # Initialize array to hold 1d-histograms of individual time series hist = np.zeros((N, n_bins), dtype='int64') # Initialize array to hold 2d-histogram for one pair of time series hist2d = np.zeros((n_bins, n_bins), dtype='int64') # Initialize mutual information array mi = np.zeros((N, N), dtype='float32') anomaly = anomaly.astype('float32').copy(order='c') mi = _calculate_mutual_information_cython(anomaly, n_samples, N, n_bins, scaling, range_min) if self.silence_level <= 1: print "Done!" return mi
def _cython_calculate_mutual_information(self, anomaly, n_bins=32): """ Calculate the mutual information matrix at zero lag. The cython code is adopted from the Tisean 3.0.1 mutual.c module. :type anomaly: 2D Numpy array (time, index) :arg anomaly: The anomaly time series. :arg int n_bins: The number of bins for estimating probability distributions. :arg bool fast: Indicates, whether fast or slow algorithm should be used. :rtype: 2D array (index, index) :return: the mutual information matrix at zero lag. """ if self.silence_level <= 1: print "Calculating mutual information matrix at zero lag from \ anomaly values using cython..." # Normalize anomaly time series to zero mean and unit variance self.data.normalize_time_series_array(anomaly) # Create local transposed copy of anomaly anomaly = np.fastCopyAndTranspose(anomaly) (N, n_samples) = anomaly.shape # Get common range for all histograms range_min = float(anomaly.min()) range_max = float(anomaly.max()) # Rescale all time series to the interval [0,1], # using the maximum range of the whole dataset. scaling = float(1. / (range_max - range_min)) # Create array to hold symbolic trajectories symbolic = np.empty((N, n_samples), dtype='int64') # Initialize array to hold 1d-histograms of individual time series hist = np.zeros((N, n_bins), dtype='int64') # Initialize array to hold 2d-histogram for one pair of time series hist2d = np.zeros((n_bins, n_bins), dtype='int64') # Initialize mutual information array mi = np.zeros((N, N), dtype='float32') anomaly = anomaly.astype('float32').copy(order='c') mi = _calculate_mutual_information_cython(anomaly, n_samples, N, n_bins, scaling, range_min) if self.silence_level <= 1: print "Done!" return mi
def _cython_calculate_mutual_information(self, anomaly, n_bins=32): """ Calculate the mutual information matrix at zero lag. The cython code is adopted from the Tisean 3.0.1 mutual.c module. :type anomaly: 2D Numpy array (time, index) :arg anomaly: The anomaly time series. :arg int n_bins: The number of bins for estimating probability distributions. :arg bool fast: Indicates, whether fast or slow algorithm should be used. :rtype: 2D array (index, index) :return: the mutual information matrix at zero lag. """ if self.silence_level <= 1: print("Calculating mutual information matrix at zero lag from " "anomaly values using cython...") # Normalize anomaly time series to zero mean and unit variance self.data.normalize_time_series_array(anomaly) # Create local transposed copy of anomaly anomaly = np.fastCopyAndTranspose(anomaly) (N, n_samples) = anomaly.shape # Get common range for all histograms range_min = float(anomaly.min()) range_max = float(anomaly.max()) # Rescale all time series to the interval [0,1], # using the maximum range of the whole dataset. scaling = 1./(range_max - range_min) mi = _calculate_mutual_information_cython( to_cy(anomaly, FIELD), n_samples, N, n_bins, scaling, range_min) if self.silence_level <= 1: print("Done!") return mi
def partial_corr(a): """ Computes partial correlation of array a. Array as dim x time; partial correlation is between first two dimensions, conditioned on others. """ from scipy import linalg, stats array = a.copy() D, T = array.shape if np.isnan(array).sum() != 0: raise ValueError("nans in the array!") # Standardize array -= array.mean(axis=1).reshape(D, 1) array /= array.std(axis=1).reshape(D, 1) if np.isnan(array).sum() != 0: raise ValueError("nans after standardizing, " "possibly constant array!") x = array[0, :] y = array[1, :] if len(array) > 2: confounds = array[2:, :] ortho_confounds = linalg.qr(np.fastCopyAndTranspose(confounds), mode='economic')[0].T x -= np.dot(np.dot(ortho_confounds, x), ortho_confounds) y -= np.dot(np.dot(ortho_confounds, y), ortho_confounds) val, pvalwrong = stats.pearsonr(x, y) df = float(T - D) if df < 1: pval = np.nan raise ValueError("D > T: Not enough degrees of freedom!") else: # Two-sided p-value accouting for degrees of freedom trafo_val = val * np.sqrt(df / (1. - np.array([val])**2)) pval = stats.t.sf(np.abs(trafo_val), df) * 2 return val, pval
def get_shuffle_significance(self, array, xyz, value, return_null_dist=False): """Returns p-value for nearest-neighbor shuffle significance test. For non-empty Z, overwrites get_shuffle_significance from the parent class which is a block shuffle test, which does not preserve dependencies of X and Y with Z. Here the parameter shuffle_neighbors is used to permute only those values :math:`x_i` and :math:`x_j` for which :math:`z_j` is among the nearest niehgbors of :math:`z_i`. If Z is empty, the block-shuffle test is used. Parameters ---------- array : array-like data array with X, Y, Z in rows and observations in columns xyz : array of ints XYZ identifier array of shape (dim,). value : number Value of test statistic for unshuffled estimate. Returns ------- pval : float p-value """ dim, T = array.shape # Skip shuffle test if value is above threshold # if value > self.minimum threshold: # if return_null_dist: # return 0., None # else: # return 0. # max_neighbors = max(1, int(max_neighbor_ratio*T)) x_indices = np.where(xyz == 0)[0] z_indices = np.where(xyz == 2)[0] if len(z_indices) > 0 and self.shuffle_neighbors < T: if self.verbosity > 2: print(" nearest-neighbor shuffle significance " "test with n = %d and %d surrogates" % (self.shuffle_neighbors, self.sig_samples)) # Get nearest neighbors around each sample point in Z z_array = np.fastCopyAndTranspose(array[z_indices, :]) tree_xyz = spatial.cKDTree(z_array) neighbors = tree_xyz.query(z_array, k=self.shuffle_neighbors, p=np.inf, eps=0.)[1].astype('int32') null_dist = np.zeros(self.sig_samples) for sam in range(self.sig_samples): # Generate random order in which to go through indices loop in # next step order = self.random_state.permutation(T).astype('int32') # print(order[:5]) # Shuffle neighbor indices for each sample index for i in range(T): self.random_state.shuffle(neighbors[i]) # Select a series of neighbor indices that contains as few as # possible duplicates restricted_permutation = \ tigramite_cython_code._get_restricted_permutation_cython( T=T, shuffle_neighbors=self.shuffle_neighbors, neighbors=neighbors, order=order) array_shuffled = np.copy(array) for i in x_indices: array_shuffled[i] = array[i, restricted_permutation] null_dist[sam] = self.get_dependence_measure( array_shuffled, xyz) else: null_dist = \ self._get_shuffle_dist(array, xyz, self.get_dependence_measure, sig_samples=self.sig_samples, sig_blocklength=self.sig_blocklength, verbosity=self.verbosity) # Sort null_dist.sort() pval = (null_dist >= value).mean() if return_null_dist: return pval, null_dist return pval
def _get_single_residuals(self, array, target_var, return_means=False, standardize=True, return_likelihood=False, training_iter=50, lr=0.1): """Returns residuals of Gaussian process regression. Performs a GP regression of the variable indexed by target_var on the conditions Z. Here array is assumed to contain X and Y as the first two rows with the remaining rows (if present) containing the conditions Z. Optionally returns the estimated mean and the likelihood. Parameters ---------- array : array-like data array with X, Y, Z in rows and observations in columns target_var : {0, 1} Variable to regress out conditions from. standardize : bool, optional (default: True) Whether to standardize the array beforehand. return_means : bool, optional (default: False) Whether to return the estimated regression line. return_likelihood : bool, optional (default: False) Whether to return the log_marginal_likelihood of the fitted GP. training_iter : int, optional (default: 50) Number of training iterations. lr : float, optional (default: 0.1) Learning rate (default: 0.1). Returns ------- resid [, mean, likelihood] : array-like The residual of the regression and optionally the estimated mean and/or the likelihood. """ dim, T = array.shape if dim <= 2: if return_likelihood: return array[target_var, :], -np.inf return array[target_var, :] # Implement using PyTorch # Standardize if standardize: array -= array.mean(axis=1).reshape(dim, 1) array /= array.std(axis=1).reshape(dim, 1) if np.isnan(array).any(): raise ValueError("Nans after standardizing, " "possibly constant array!") target_series = array[target_var, :] z = np.fastCopyAndTranspose(array[2:]) if np.ndim(z) == 1: z = z.reshape(-1, 1) train_x = torch.tensor(z).float() train_y = torch.tensor(target_series).float() device_type = 'cuda' if torch.cuda.is_available() else 'cpu' output_device = torch.device(device_type) train_x, train_y = train_x.to(output_device), train_y.to(output_device) if device_type == 'cuda': # If GPU is available, use MultiGPU with Kernel Partitioning n_devices = torch.cuda.device_count() class mExactGPModel(gpytorch.models.ExactGP): def __init__(self, train_x, train_y, likelihood, n_devices): super(mExactGPModel, self).__init__(train_x, train_y, likelihood) self.mean_module = gpytorch.means.ConstantMean() base_covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel()) self.covar_module = gpytorch.kernels.MultiDeviceKernel( base_covar_module, device_ids=range(n_devices), output_device=output_device) def forward(self, x): mean_x = self.mean_module(x) covar_x = self.covar_module(x) return gpytorch.distributions.MultivariateNormal( mean_x, covar_x) def mtrain( train_x, train_y, n_devices, output_device, checkpoint_size, preconditioner_size, n_training_iter, ): likelihood = gpytorch.likelihoods.GaussianLikelihood().to( output_device) model = mExactGPModel(train_x, train_y, likelihood, n_devices).to(output_device) model.train() likelihood.train() optimizer = FullBatchLBFGS(model.parameters(), lr=lr) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood( likelihood, model) with gpytorch.beta_features.checkpoint_kernel(checkpoint_size), \ gpytorch.settings.max_preconditioner_size(preconditioner_size): def closure(): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) return loss loss = closure() loss.backward() for i in range(n_training_iter): options = { 'closure': closure, 'current_loss': loss, 'max_ls': 10 } loss, _, _, _, _, _, _, fail = optimizer.step(options) '''print('Iter %d/%d - Loss: %.3f lengthscale: %.3f noise: %.3f' % ( i + 1, n_training_iter, loss.item(), model.covar_module.module.base_kernel.lengthscale.item(), model.likelihood.noise.item() ))''' if fail: # print('Convergence reached!') break return model, likelihood, mll def find_best_gpu_setting(train_x, train_y, n_devices, output_device, preconditioner_size): N = train_x.size(0) # Find the optimum partition/checkpoint size by decreasing in powers of 2 # Start with no partitioning (size = 0) settings = [0] + [ int(n) for n in np.ceil(N / 2**np.arange(1, np.floor(np.log2(N)))) ] for checkpoint_size in settings: print('Number of devices: {} -- Kernel partition size: {}'. format(n_devices, checkpoint_size)) try: # Try a full forward and backward pass with this setting to check memory usage _, _, _ = mtrain( train_x, train_y, n_devices=n_devices, output_device=output_device, checkpoint_size=checkpoint_size, preconditioner_size=preconditioner_size, n_training_iter=1) # when successful, break out of for-loop and jump to finally block break except RuntimeError as e: pass except AttributeError as e: pass finally: # handle CUDA OOM error gc.collect() torch.cuda.empty_cache() return checkpoint_size # Set a large enough preconditioner size to reduce the number of CG iterations run preconditioner_size = 100 if self.checkpoint_size is None: self.checkpoint_size = find_best_gpu_setting( train_x, train_y, n_devices=n_devices, output_device=output_device, preconditioner_size=preconditioner_size) model, likelihood, mll = mtrain( train_x, train_y, n_devices=n_devices, output_device=output_device, checkpoint_size=self.checkpoint_size, preconditioner_size=100, n_training_iter=training_iter) # Get into evaluation (predictive posterior) mode model.eval() likelihood.eval() # Make predictions by feeding model through likelihood with torch.no_grad(), gpytorch.settings.fast_pred_var( ), gpytorch.beta_features.checkpoint_kernel(1000): mean = model(train_x).loc.detach() loglik = mll(model(train_x), train_y) * T resid = (train_y - mean).detach().cpu().numpy() mean = mean.detach().cpu().numpy() else: # If only CPU is available, we will use the simplest form of GP model, exact inference class ExactGPModel(gpytorch.models.ExactGP): def __init__(self, train_x, train_y, likelihood): super(ExactGPModel, self).__init__(train_x, train_y, likelihood) self.mean_module = gpytorch.means.ConstantMean() # We only use the RBF kernel here, the WhiteNoiseKernel is deprecated # and its featured integrated into the Likelihood-Module. self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel()) def forward(self, x): mean_x = self.mean_module(x) covar_x = self.covar_module(x) return gpytorch.distributions.MultivariateNormal( mean_x, covar_x) # initialize likelihood and model likelihood = gpytorch.likelihoods.GaussianLikelihood() model = ExactGPModel(train_x, train_y, likelihood) # Find optimal model hyperparameters model.train() likelihood.train() # Use the adam optimizer # Includes GaussianLikelihood parameters optimizer = torch.optim.Adam(model.parameters(), lr=lr) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) for i in range(training_iter): # Zero gradients from previous iteration optimizer.zero_grad() # Output from model output = model(train_x) # Calc loss and backprop gradients loss = -mll(output, train_y) loss.backward() optimizer.step() # Get into evaluation (predictive posterior) mode model.eval() likelihood.eval() # Make predictions by feeding model through likelihood with torch.no_grad(), gpytorch.settings.fast_pred_var(): mean = model(train_x).loc.detach() loglik = mll(model(train_x), train_y) * T resid = (train_y - mean).detach().numpy() mean = mean.detach().numpy() if return_means and not return_likelihood: return resid, mean elif return_likelihood and not return_means: return resid, loglik elif return_means and return_likelihood: return resid, mean, loglik return resid
def train(self, X_train, y_train, X_validation, y_validation, X_test, y_test, lr, nb_epochs, minibatch_size): # neural network with L hidden layers print('_' * 50) print('Train on %d samples, validate on %d samples' % (len(X_train), len(X_validation))) L = self.no_of_hidden_layers best_teta = None best_accuracy = 0 losses_train = [] losses_val = [] accuracies_train = [] accuracies_val = [] for epoch in range(nb_epochs): loss = 0 accuracy = 0 print("Epoch : " + str(epoch + 1) + " / " + str(nb_epochs)) for i in range(0, X_train.shape[0], minibatch_size): X_train_mini = X_train[i:i + minibatch_size] if (i + minibatch_size < X_train.shape[0]) \ else X_train[i:X_train.shape[0]] y_train_mini = y_train[i:i + minibatch_size] if (i + minibatch_size < X_train.shape[0]) \ else y_train[i:X_train.shape[0]] self.progress(min(i + minibatch_size, len(X_train)), len(X_train)) # forward pass for each example # for each layer node_in is the list of nodes where node_in = W x + b node_in = [None] * (L + 2) # for each layer node_a is the list of nodes # where node_a = f(node_in), f is the activation function node_a = [None] * (L + 2) # for each layer node_delta is the list of nodes # where node_delta = f'(node_in), f' is the derivative of activation function node_delta = [None] * (L + 2) node_a[0] = X_train_mini # item j from the input vector of the example for l in range(1, L + 2): TetaT = np.fastCopyAndTranspose(self.Teta[l - 1]) node_in[l] = node_a[l - 1] @ TetaT # in_i <- sum_j{Teta_j_i * a_j} if l < L + 1: # the hidden layers node_a[l] = relu(node_in[l]) # relu node_a[l] = np.concatenate((node_a[l], np.ones((node_a[l].shape[0], 1))), axis=1) # add the bias b to use it in the next layer else: # the output layer node_a[l] = softmax(node_in[l]) # backpropagation y = y_train_mini node_delta[L + 1] = softmax_backward(y, node_a[L + 1]) for l in range(L, 0, -1): node_delta[l] = relu_backward(node_a[l][:, :-1]) * ( (node_delta[l + 1] @ self.Teta[l][:, :-1])) # W.T @ node_delta[l + 1] # update parameters for l in range(0, L + 1): self.Teta[l] = self.Teta[l] + lr * (node_delta[l + 1].T @ node_a[ l]) # * 1 / len(node_a[l]) #(node_delta[l + 1].T @ node_a[l]) # self.Teta[l] = np.clip(self.Teta[l], -1000.0, 1000.0) # compute the loss on the train set y_train_pred = self.predict(X_train) loss = get_loss(y_train, y_train_pred) losses_train.append(loss) # compute the loss on the validation set y_validation_pred = self.predict(X_validation) loss_val = get_loss(y_validation, y_validation_pred) losses_val.append(loss_val) # compute the accuracy on the training set accuracy = get_accuracy(y_train_pred, y_train) accuracies_train.append(accuracy) # compute the accuracy on the validation set accuracy_val = get_accuracy(y_validation_pred, y_validation) accuracies_val.append(accuracy_val) print(' - loss: %.4f - acc: %.4f - val_loss: %.4f - val_acc: %.4f' % (loss, accuracy, loss_val, accuracy_val)) if accuracy_val > best_accuracy: # select the best parameters based on the validation accuracy best_accuracy = accuracy_val best_teta = self.Teta # set Teta to the best matrix weights to use it to test the model on unseen data self.Teta = best_teta y_test_pred = self.predict(X_test) # compute the accuracy on the test set accuracy_on_unseen_data = get_accuracy(y_test_pred, y_test) print("Best Accuracy on validation data: %.4f" % (best_accuracy)) print("Accuracy on test data: %.4f" % (accuracy_on_unseen_data)) return losses_train, losses_val, accuracies_train, accuracies_val, best_teta, best_accuracy
def information_transfer(self, tau_max=0, estimator='knn', knn=10, past=1, cond_mode='ity', lag_mode='max'): r""" Return bivariate information transfer between all pairs of nodes. Two condition modes of information transfer are available as described in [Runge2012b]_. Information transfer to Y (ITY): .. math:: I(X^i_t-\tau, X^j_t | X^j_t-1, ...,X^j_t-past) Momentary information transfer (MIT): .. math:: I(X^i_t-\tau, X^j_t | X^j_t-1, ...,X^j_t-past, X^i_t-\tau-1, ...,X^j_t-\tau-past) Two estimators are available: estimator = 'knn' (Recommended): Based on k-nearest-neighbors [Kraskov2004]_, version 1 in their paper. Larger k have smaller variance, but larger (typically negative) bias, and vice versa. estimator = 'gauss': Captures only linear part of association. Essentially estimates a transformed partial correlation. Two lag-modes are available (default: lag_mode='max'): lag_mode = 'all': Return 3-dimensional array of lag-functions between all pairs of nodes. An entry :math:`(i, j, \tau)` corresponds to :math:`I(X^i_t-\tau, X^j_t | ...)` for positive lags tau, i.e., the direction i --> j for :math:`\tau \ne 0`. lag_mode = 'max': Return matrix of absolute maxima and corresponding lags of lag-functions between all pairs of nodes. Returns two usually asymmetric matrices of values and lags: In each matrix, an entry :math:`(i, j)` corresponds to the value and lag, respectively, at absolute maximum of :math:`I(X^i_t-\tau, X^j_t | ...)` for positive lags tau, i.e., the direction i --> j for :math:`\tau > 0`. The matrices are, thus, asymmetric. The function :meth:`.symmetrize_by_absmax` can be used to obtain a symmetric matrix. **Example:** >>> coup_ana = CouplingAnalysis(CouplingAnalysis.test_data()) >>> similarity_matrix, lag_matrix = coup_ana.information_transfer( ... tau_max=5, estimator='knn', knn=10) >>> r((similarity_matrix, lag_matrix)) (array([[ 0. , 0.1544, 0.3261, 0.3047], [ 0.0218, 0. , 0.0394, 0.0976], [ 0.0134, 0.0663, 0. , 0.1502], [ 0.0066, 0.0694, 0.0401, 0. ]]), array([[0, 2, 1, 2], [5, 0, 0, 0], [5, 1, 0, 1], [5, 0, 0, 0]])) :type tau_max: int [int>=0] :arg tau_max: maximum lag of ITY lag function. :type past: int [int>=1] :arg past: maximum lag of past history. :type knn: int [int>=1] :arg knn: nearest-neighbor ITY estimation parameter. (default: 10) :type bins: int [int>=2] :arg bins: binning ITY estimation parameter. (default: 6) :type estimator: str [('knn'|'gauss')] :arg estimator: ITY estimator. (default: 'knn') :type cond_mode: str [('ity'|'mit')] :arg cond_mode: condition mode. (default: 'ity') :type lag_mode: str [('max'|'all')] :arg lag_mode: lag-mode of ITY to return. :rtype: 3D-array or tuple of matrices :returns: all-lag array or matrices of value and lag at the absolute maximum. """ data = self.data T, N = data.shape # Sanity checks if not isinstance(data, numpy.ndarray): raise TypeError("data is of type %s, must be numpy.ndarray" % type(data)) if N > T: print(f"Warning: data.shape = {data.shape}," " is it of shape (observations, variables) ?") if estimator == 'knn' and T < 500: print(f"Warning: T = {T} ," " unreliable estimation using knn-estimator") if numpy.isnan(data).sum() != 0: raise ValueError("NaNs in the data") if tau_max < 0: raise ValueError("tau_max = %d, but 0 <= tau_max" % tau_max) if estimator == 'knn': if knn > T / 2. or knn < 1: raise ValueError(f"knn = {knn}, should be between 1 and T/2") if lag_mode == 'max': similarity_matrix = numpy.ones((N, N), dtype='float32') lag_matrix = numpy.zeros((N, N), dtype='int8') elif lag_mode == 'all': lagfuncs = numpy.zeros((N, N, tau_max + 1), dtype='float32') for i in range(N): for j in range(N): maximum = 0. lag_at_max = 0 for tau in range(tau_max + 1): X = [(i, -tau)] Y = [(j, 0)] if cond_mode == 'ity': Z = [(j, -p) for p in range(1, past + 1)] elif cond_mode == 'mit': Z = [(j, -p) for p in range(1, past + 1)] Z += [(i, -tau - p) for p in range(1, past + 1)] XYZ = X + Y + Z dim = len(XYZ) max_lag = tau_max + past array = numpy.zeros((dim, T - max_lag)) for d, node in enumerate(XYZ): var, lag = node array[d, :] = data[max_lag + lag:T + lag, var] if estimator == 'knn': xyz = numpy.array([0, 1]) k_xz, k_yz, k_z = self._get_nearest_neighbors( array=array, xyz=xyz, k=knn, standardize=True) ixy_z = ( special.digamma(knn) + (-special.digamma(k_xz) - special.digamma(k_yz) + special.digamma(k_z)).mean()) elif estimator == 'gauss': if numpy.isnan(array).sum() != 0: raise ValueError("nans in the array!") # Standardize array -= array.mean(axis=1).reshape(dim, 1) array /= array.std(axis=1).reshape(dim, 1) if numpy.isnan(array).sum() != 0: raise ValueError("nans after standardizing, \ possibly constant array!") x = array[0, :] y = array[1, :] if len(array) > 2: confounds = array[2:, :] ortho_confounds = linalg.qr( numpy.fastCopyAndTranspose(confounds), mode='economic')[0].T x -= numpy.dot(numpy.dot(ortho_confounds, x), ortho_confounds) y -= numpy.dot(numpy.dot(ortho_confounds, y), ortho_confounds) ixy_z = self._par_corr_to_cmi( numpy.dot(x, y) / numpy.sqrt(numpy.dot(x, x) * numpy.dot(y, y))) if lag_mode == 'max': if ixy_z > maximum: maximum = ixy_z lag_at_max = tau elif lag_mode == 'all': lagfuncs[i, j, tau] = ixy_z if lag_mode == 'max': similarity_matrix[i, j] = maximum lag_matrix[i, j] = lag_at_max if lag_mode == 'max': similarity_matrix[range(N), range(N)] = 0. elif lag_mode == 'all': lagfuncs[range(N), range(N), 0.] = 0. if lag_mode == 'max': return similarity_matrix, lag_matrix elif lag_mode == 'all': return lagfuncs else: return None
def information_transfer(self, tau_max=0, estimator='knn', knn=10, past=1, cond_mode='ity', lag_mode='max'): r""" Return bivariate information transfer between all pairs of nodes. Two condition modes of information transfer are available as described in [Runge2012b]_. Information transfer to Y (ITY): .. math:: I(X^i_t-\tau, X^j_t | X^j_t-1, ...,X^j_t-past) Momentary information transfer (MIT): .. math:: I(X^i_t-\tau, X^j_t | X^j_t-1, ...,X^j_t-past, X^i_t-\tau-1, ...,X^j_t-\tau-past) Two estimators are available: estimator = 'knn' (Recommended): Based on k-nearest-neighbors [Kraskov2004]_, version 1 in their paper. Larger k have smaller variance, but larger (typically negative) bias, and vice versa. estimator = 'gauss': Captures only linear part of association. Essentially estimates a transformed partial correlation. Two lag-modes are available (default: lag_mode='max'): lag_mode = 'all': Return 3-dimensional array of lag-functions between all pairs of nodes. An entry :math:`(i, j, \tau)` corresponds to :math:`I(X^i_t-\tau, X^j_t | ...)` for positive lags tau, i.e., the direction i --> j for :math:`\tau \ne 0`. lag_mode = 'max': Return matrix of absolute maxima and corresponding lags of lag-functions between all pairs of nodes. Returns two usually asymmetric matrices of values and lags: In each matrix, an entry :math:`(i, j)` corresponds to the value and lag, respectively, at absolute maximum of :math:`I(X^i_t-\tau, X^j_t | ...)` for positive lags tau, i.e., the direction i --> j for :math:`\tau > 0`. The matrices are, thus, asymmetric. The function :meth:`.symmetrize_by_absmax` can be used to obtain a symmetric matrix. **Example:** >>> coup_ana = CouplingAnalysis(CouplingAnalysis.test_data()) >>> similarity_matrix, lag_matrix = coup_ana.information_transfer( ... tau_max=5, estimator='knn', knn=10) >>> r((similarity_matrix, lag_matrix)) (array([[ 0. , 0.1544, 0.3261, 0.3047], [ 0.0218, 0. , 0.0394, 0.0976], [ 0.0134, 0.0663, 0. , 0.1502], [ 0.0066, 0.0694, 0.0401, 0. ]]), array([[0, 2, 1, 2], [5, 0, 0, 0], [5, 1, 0, 1], [5, 0, 0, 0]])) :type tau_max: int [int>=0] :arg tau_max: maximum lag of ITY lag function. :type past: int [int>=1] :arg past: maximum lag of past history. :type knn: int [int>=1] :arg knn: nearest-neighbor ITY estimation parameter. (default: 10) :type bins: int [int>=2] :arg bins: binning ITY estimation parameter. (default: 6) :type estimator: str [('knn'|'gauss')] :arg estimator: ITY estimator. (default: 'knn') :type cond_mode: str [('ity'|'mit')] :arg cond_mode: condition mode. (default: 'ity') :type lag_mode: str [('max'|'all')] :arg lag_mode: lag-mode of ITY to return. :rtype: 3D-array or tuple of matrices :returns: all-lag array or matrices of value and lag at the absolute maximum. """ data = self.data T, N = data.shape # Sanity checks if not isinstance(data, numpy.ndarray): raise TypeError("data is of type %s, " % type(data) + "must be numpy.ndarray") if N > T: print("Warning: data.shape = %s," % str(data.shape) + " is it of shape (observations, variables) ?") if estimator == 'knn' and T < 500: print("Warning: T = %s ," % str(T) + " unreliable estimation using knn-estimator") if numpy.isnan(data).sum() != 0: raise ValueError("NaNs in the data") if tau_max < 0: raise ValueError("tau_max = %d, " % (tau_max) + "but 0 <= tau_max") if estimator == 'knn': if knn > T/2. or knn < 1: raise ValueError("knn = %s , " % str(knn) + "should be between 1 and T/2") if lag_mode == 'max': similarity_matrix = numpy.ones((N, N), dtype='float32') lag_matrix = numpy.zeros((N, N), dtype='int8') elif lag_mode == 'all': lagfuncs = numpy.zeros((N, N, tau_max+1), dtype='float32') for i in range(N): for j in range(N): maximum = 0. lag_at_max = 0 for tau in range(tau_max + 1): X = [(i, -tau)] Y = [(j, 0)] if cond_mode == 'ity': Z = [(j, -p) for p in range(1, past + 1)] elif cond_mode == 'mit': Z = [(j, -p) for p in range(1, past + 1)] Z += [(i, -tau - p) for p in range(1, past + 1)] XYZ = X + Y + Z dim = len(XYZ) max_lag = tau_max + past array = numpy.zeros((dim, T - max_lag)) for d, node in enumerate(XYZ): var, lag = node array[d, :] = data[max_lag + lag: T + lag, var] if estimator == 'knn': xyz = numpy.array([0, 1]) k_xz, k_yz, k_z = self._get_nearest_neighbors( array=array, xyz=xyz, k=knn, standardize=True) ixy_z = (special.digamma(knn) + (- special.digamma(k_xz) - special.digamma(k_yz) + special.digamma(k_z)).mean()) elif estimator == 'gauss': if numpy.isnan(array).sum() != 0: raise ValueError("nans in the array!") # Standardize array -= array.mean(axis=1).reshape(dim, 1) array /= array.std(axis=1).reshape(dim, 1) if numpy.isnan(array).sum() != 0: raise ValueError("nans after standardizing, " "possibly constant array!""") x = array[0, :] y = array[1, :] if len(array) > 2: confounds = array[2:, :] ortho_confounds = linalg.qr( numpy.fastCopyAndTranspose(confounds), mode='economic')[0].T x -= numpy.dot(numpy.dot(ortho_confounds, x), ortho_confounds) y -= numpy.dot(numpy.dot(ortho_confounds, y), ortho_confounds) ixy_z = self._par_corr_to_cmi( numpy.dot(x, y) / numpy.sqrt(numpy.dot(x, x) * numpy.dot(y, y))) if lag_mode == 'max': if ixy_z > maximum: maximum = ixy_z lag_at_max = tau elif lag_mode == 'all': lagfuncs[i, j, tau] = ixy_z if lag_mode == 'max': similarity_matrix[i, j] = maximum lag_matrix[i, j] = lag_at_max if lag_mode == 'max': similarity_matrix[range(N), range(N)] = 0. elif lag_mode == 'all': lagfuncs[range(N), range(N), 0.] = 0. if lag_mode == 'max': return similarity_matrix, lag_matrix elif lag_mode == 'all': return lagfuncs
def _weave_calculate_mutual_information(self, anomaly, n_bins=32, fast=True): """ Calculate the mutual information matrix at zero lag. The weave code is adopted from the Tisean 3.0.1 mutual.c module. :type anomaly: 2D Numpy array (time, index) :arg anomaly: The anomaly time series. :arg int n_bins: The number of bins for estimating probability distributions. :arg bool fast: Indicates, whether fast or slow algorithm should be used. :rtype: 2D array (index, index) :return: the mutual information matrix at zero lag. """ if self.silence_level <= 1: print "Calculating mutual information matrix at zero lag from \ anomaly values using Weave..." # Normalize anomaly time series to zero mean and unit variance self.data.normalize_time_series_array(anomaly) # Create local transposed copy of anomaly anomaly = np.fastCopyAndTranspose(anomaly) (N, n_samples) = anomaly.shape # Get common range for all histograms range_min = float(anomaly.min()) range_max = float(anomaly.max()) # Rescale all time series to the interval [0,1], # using the maximum range of the whole dataset. scaling = float(1. / (range_max - range_min)) # Create array to hold symbolic trajectories symbolic = np.empty(anomaly.shape, dtype=LONG_TYPE) # Initialize array to hold 1d-histograms of individual time series hist = np.zeros((N, n_bins), dtype=LONG_TYPE) # Initialize array to hold 2d-histogram for one pair of time series hist2d = np.zeros((n_bins, n_bins), dtype=LONG_TYPE) # Initialize mutual information array mi = np.zeros((N, N), dtype=DOUBLE_TYPE) code = r""" int i, j, k, l, m; int symbol, symbol_i, symbol_j; double norm, rescaled, hpl, hpm, plm; // Calculate histogram norm norm = 1.0 / n_samples; for (i = 0; i < N; i++) { for (k = 0; k < n_samples; k++) { // Calculate symbolic trajectories for each time series, // where the symbols are bins. rescaled = scaling * (anomaly(i,k) - range_min); if (rescaled < 1.0) { symbolic(i,k) = rescaled * n_bins; } else { symbolic(i,k) = n_bins - 1; } // Calculate 1d-histograms for single time series symbol = symbolic(i,k); hist(i,symbol) += 1; } } for (i = 0; i < N; i++) { for (j = 0; j <= i; j++) { // The case i = j is not of interest here! if (i != j) { // Calculate 2d-histogram for one pair of time series // (i,j). for (k = 0; k < n_samples; k++) { symbol_i = symbolic(i,k); symbol_j = symbolic(j,k); hist2d(symbol_i,symbol_j) += 1; } // Calculate mutual information for one pair of time // series (i,j). for (l = 0; l < n_bins; l++) { hpl = hist(i,l) * norm; if (hpl > 0.0) { for (m = 0; m < n_bins; m++) { hpm = hist(j,m) * norm; if (hpm > 0.0) { plm = hist2d(l,m) * norm; if (plm > 0.0) { mi(i,j) += plm * log(plm/hpm/hpl); } } } } } // Symmetrize MI mi(j,i) = mi(i,j); // Reset hist2d to zero in all bins for (l = 0; l < n_bins; l++) { for (m = 0; m < n_bins; m++) { hist2d(l,m) = 0; } } } } } """ # anomaly must be a contiguous Numpy array for this code to work # correctly! All the other arrays are generated from scratch in this # method and are guaranteed to be contiguous by Numpy. fastCode = r""" long i, j, k, l, m, in_bins, jn_bins, ln_bins, in_samples, jn_samples, in_nodes; double norm, rescaled, hpl, hpm, plm; double *p_anomaly; float *p_mi, *p_mi2; long *p_symbolic, *p_symbolic1, *p_symbolic2, *p_hist, *p_hist1, *p_hist2, *p_hist2d; // Calculate histogram norm norm = 1.0 / n_samples; // Initialize in_samples, in_bins in_samples = in_bins = 0; for (i = 0; i < N; i++) { // Set pointer to anomaly(i,0) p_anomaly = anomaly + in_samples; // Set pointer to symbolic(i,0) p_symbolic = symbolic + in_samples; for (k = 0; k < n_samples; k++) { // Rescale sample into interval [0,1] rescaled = scaling * (*p_anomaly - range_min); // Calculate symbolic trajectories for each time series, // where the symbols are bin numbers. if (rescaled < 1.0) { *p_symbolic = rescaled * n_bins; } else { *p_symbolic = n_bins - 1; } // Calculate 1d-histograms for single time series // Set pointer to hist(i, *p_symbolic) p_hist = hist + in_bins + *p_symbolic; (*p_hist)++; // Set pointer to anomaly(k+1,i) p_anomaly++; // Set pointer to symbolic(k+1,i) p_symbolic++; } in_samples += n_samples; in_bins += n_bins; } // Initialize in_samples, in_bins, in_nodes in_samples = in_bins = in_nodes = 0; for (i = 0; i < N; i++) { // Set pointer to mi(i,0) p_mi = mi + in_nodes; // Set pointer to mi(0,i) p_mi2 = mi + i; // Initialize jn_samples, jn_bins jn_samples = jn_bins = 0; for (j = 0; j <= i; j++) { // Don't do anything for i = j, this case is not of // interest here! if (i != j) { // Set pointer to symbolic(i,0) p_symbolic1 = symbolic + in_samples; // Set pointer to symbolic(j,0) p_symbolic2 = symbolic + jn_samples; // Calculate 2d-histogram for one pair of time series // (i,j). for (k = 0; k < n_samples; k++) { // Set pointer to hist2d(*p_symbolic1, *p_symbolic2) p_hist2d = hist2d + (*p_symbolic1)*n_bins + *p_symbolic2; (*p_hist2d)++; // Set pointer to symbolic(i,k+1) p_symbolic1++; // Set pointer to symbolic(j,k+1) p_symbolic2++; } // Calculate mutual information for one pair of time // series (i,j). // Set pointer to hist(i,0) p_hist1 = hist + in_bins; // Initialize ln_bins ln_bins = 0; for (l = 0; l < n_bins; l++) { // Set pointer to hist(j,0) p_hist2 = hist + jn_bins; // Set pointer to hist2d(l,0) p_hist2d = hist2d + ln_bins; hpl = (*p_hist1) * norm; if (hpl > 0.0) { for (m = 0; m < n_bins; m++) { hpm = (*p_hist2) * norm; if (hpm > 0.0) { plm = (*p_hist2d) * norm; if (plm > 0.0) { *p_mi += plm * log(plm/hpm/hpl); } } // Set pointer to hist(j,m+1) p_hist2++; // Set pointer to hist2d(l,m+1) p_hist2d++; } } // Set pointer to hist(i,l+1) p_hist1++; ln_bins += n_bins; } // Symmetrize MI *p_mi2 = *p_mi; // Initialize ln_bins ln_bins = 0; // Reset hist2d to zero in all bins for (l = 0; l < n_bins; l++) { // Set pointer to hist2d(l,0) p_hist2d = hist2d + ln_bins; for (m = 0; m < n_bins; m++) { *p_hist2d = 0; // Set pointer to hist2d(l,m+1) p_hist2d++; } ln_bins += n_bins; } } // Set pointer to mi(i,j+1) p_mi++; // Set pointer to mi(j+1,i) p_mi2 += N; jn_samples += n_samples; jn_bins += n_bins; } in_samples += n_samples; in_bins += n_bins; in_nodes += N; } """ args = ['anomaly', 'n_samples', 'N', 'n_bins', 'scaling', 'range_min', 'symbolic', 'hist', 'hist2d', 'mi'] if fast: weave_inline(locals(), fastCode, args, blitz=False) else: weave_inline(locals(), code, args) if self.silence_level <= 1: print "Done!" return mi
def _get_single_residuals(self, array, target_var, return_means=False, standardize=True, return_likelihood=False): """Returns residuals of Gaussian process regression. Performs a GP regression of the variable indexed by target_var on the conditions Z. Here array is assumed to contain X and Y as the first two rows with the remaining rows (if present) containing the conditions Z. Optionally returns the estimated mean and the likelihood. Parameters ---------- array : array-like data array with X, Y, Z in rows and observations in columns target_var : {0, 1} Variable to regress out conditions from. standardize : bool, optional (default: True) Whether to standardize the array beforehand. return_means : bool, optional (default: False) Whether to return the estimated regression line. return_likelihood : bool, optional (default: False) Whether to return the log_marginal_likelihood of the fitted GP Returns ------- resid [, mean, likelihood] : array-like The residual of the regression and optionally the estimated mean and/or the likelihood. """ dim, T = array.shape if self.gp_params is None: self.gp_params = {} if dim <= 2: if return_likelihood: return array[target_var, :], -np.inf return array[target_var, :] # Standardize if standardize: array -= array.mean(axis=1).reshape(dim, 1) array /= array.std(axis=1).reshape(dim, 1) if np.isnan(array).sum() != 0: raise ValueError("nans after standardizing, " "possibly constant array!") target_series = array[target_var, :] z = np.fastCopyAndTranspose(array[2:]) if np.ndim(z) == 1: z = z.reshape(-1, 1) # Overwrite default kernel and alpha values params = self.gp_params.copy() if 'kernel' not in list(self.gp_params): kernel = gaussian_process.kernels.RBF() +\ gaussian_process.kernels.WhiteKernel() else: kernel = self.gp_params['kernel'] del params['kernel'] if 'alpha' not in list(self.gp_params): alpha = 0. else: alpha = self.gp_params['alpha'] del params['alpha'] gp = gaussian_process.GaussianProcessRegressor(kernel=kernel, alpha=alpha, **params) gp.fit(z, target_series.reshape(-1, 1)) if self.verbosity > 3: print(kernel, alpha, gp.kernel_, gp.alpha) if return_likelihood: likelihood = gp.log_marginal_likelihood() mean = gp.predict(z).squeeze() resid = target_series - mean if return_means and not return_likelihood: return (resid, mean) elif return_likelihood and not return_means: return (resid, likelihood) elif return_means and return_likelihood: return resid, mean, likelihood return resid
def _get_single_residuals(self, array, target_var, return_means=False, standardize=True, return_likelihood=False): """Returns residuals of Gaussian process regression. Performs a GP regression of the variable indexed by target_var on the conditions Z. Here array is assumed to contain X and Y as the first two rows with the remaining rows (if present) containing the conditions Z. Optionally returns the estimated mean and the likelihood. Parameters ---------- array : array-like data array with X, Y, Z in rows and observations in columns target_var : {0, 1} Variable to regress out conditions from. standardize : bool, optional (default: True) Whether to standardize the array beforehand. return_means : bool, optional (default: False) Whether to return the estimated regression line. return_likelihood : bool, optional (default: False) Whether to return the log_marginal_likelihood of the fitted GP Returns ------- resid [, mean, likelihood] : array-like The residual of the regression and optionally the estimated mean and/or the likelihood. """ dim, T = array.shape if self.gp_params is None: self.gp_params = {} if dim <= 2: if return_likelihood: return array[target_var, :], -np.inf return array[target_var, :] # Standardize if standardize: array -= array.mean(axis=1).reshape(dim, 1) array /= array.std(axis=1).reshape(dim, 1) if np.isnan(array).sum() != 0: raise ValueError("nans after standardizing, " "possibly constant array!") target_series = array[target_var, :] z = np.fastCopyAndTranspose(array[2:]) if np.ndim(z) == 1: z = z.reshape(-1, 1) if self.gp_version == 'old': # Old GP failed for ties in the data def remove_ties(series, verbosity=0): # Test whether ties exist and add noise to destroy ties... cnt = 0 while len(np.unique(series)) < np.size(series): series += 1E-6 * np.random.rand(*series.shape) cnt += 1 if cnt > 100: break return series z = remove_ties(z) target_series = remove_ties(target_series) gp = gaussian_process.GaussianProcess(nugget=1E-1, thetaL=1E-16, thetaU=np.inf, corr='squared_exponential', optimizer='fmin_cobyla', regr='constant', normalize=False, storage_mode='light') elif self.gp_version == 'new': # Overwrite default kernel and alpha values params = self.gp_params.copy() if 'kernel' not in list(self.gp_params): kernel = gaussian_process.kernels.RBF() +\ gaussian_process.kernels.WhiteKernel() else: kernel = self.gp_params['kernel'] del params['kernel'] if 'alpha' not in list(self.gp_params): alpha = 0. else: alpha = self.gp_params['alpha'] del params['alpha'] gp = gaussian_process.GaussianProcessRegressor(kernel=kernel, alpha=alpha, **params) gp.fit(z, target_series.reshape(-1, 1)) if self.verbosity > 3 and self.gp_version == 'new': print(kernel, alpha, gp.kernel_, gp.alpha) if self.verbosity > 3 and self.gp_version == 'old': print(gp.get_params) if return_likelihood: likelihood = gp.log_marginal_likelihood() mean = gp.predict(z).squeeze() resid = target_series - mean if return_means and not return_likelihood: return (resid, mean) elif return_likelihood and not return_means: return (resid, likelihood) elif return_means and return_likelihood: return resid, mean, likelihood return resid
def _weave_calculate_mutual_information(self, anomaly, n_bins=32, fast=True): """ Calculate the mutual information matrix at zero lag. The weave code is adopted from the Tisean 3.0.1 mutual.c module. :type anomaly: 2D Numpy array (time, index) :arg anomaly: The anomaly time series. :arg int n_bins: The number of bins for estimating probability distributions. :arg bool fast: Indicates, whether fast or slow algorithm should be used. :rtype: 2D array (index, index) :return: the mutual information matrix at zero lag. """ if self.silence_level <= 1: print "Calculating mutual information matrix at zero lag from \ anomaly values using Weave..." # Normalize anomaly time series to zero mean and unit variance self.data.normalize_time_series_array(anomaly) # Create local transposed copy of anomaly anomaly = np.fastCopyAndTranspose(anomaly) (N, n_samples) = anomaly.shape # Get common range for all histograms range_min = float(anomaly.min()) range_max = float(anomaly.max()) # Rescale all time series to the interval [0,1], # using the maximum range of the whole dataset. scaling = float(1. / (range_max - range_min)) # Create array to hold symbolic trajectories symbolic = np.empty(anomaly.shape, dtype=LONG_TYPE) # Initialize array to hold 1d-histograms of individual time series hist = np.zeros((N, n_bins), dtype=LONG_TYPE) # Initialize array to hold 2d-histogram for one pair of time series hist2d = np.zeros((n_bins, n_bins), dtype=LONG_TYPE) # Initialize mutual information array mi = np.zeros((N, N), dtype=DOUBLE_TYPE) code = r""" int i, j, k, l, m; int symbol, symbol_i, symbol_j; double norm, rescaled, hpl, hpm, plm; // Calculate histogram norm norm = 1.0 / n_samples; for (i = 0; i < N; i++) { for (k = 0; k < n_samples; k++) { // Calculate symbolic trajectories for each time series, // where the symbols are bins. rescaled = scaling * (anomaly(i,k) - range_min); if (rescaled < 1.0) { symbolic(i,k) = rescaled * n_bins; } else { symbolic(i,k) = n_bins - 1; } // Calculate 1d-histograms for single time series symbol = symbolic(i,k); hist(i,symbol) += 1; } } for (i = 0; i < N; i++) { for (j = 0; j <= i; j++) { // The case i = j is not of interest here! if (i != j) { // Calculate 2d-histogram for one pair of time series // (i,j). for (k = 0; k < n_samples; k++) { symbol_i = symbolic(i,k); symbol_j = symbolic(j,k); hist2d(symbol_i,symbol_j) += 1; } // Calculate mutual information for one pair of time // series (i,j). for (l = 0; l < n_bins; l++) { hpl = hist(i,l) * norm; if (hpl > 0.0) { for (m = 0; m < n_bins; m++) { hpm = hist(j,m) * norm; if (hpm > 0.0) { plm = hist2d(l,m) * norm; if (plm > 0.0) { mi(i,j) += plm * log(plm/hpm/hpl); } } } } } // Symmetrize MI mi(j,i) = mi(i,j); // Reset hist2d to zero in all bins for (l = 0; l < n_bins; l++) { for (m = 0; m < n_bins; m++) { hist2d(l,m) = 0; } } } } } """ # anomaly must be a contiguous Numpy array for this code to work # correctly! All the other arrays are generated from scratch in this # method and are guaranteed to be contiguous by Numpy. fastCode = r""" long i, j, k, l, m, in_bins, jn_bins, ln_bins, in_samples, jn_samples, in_nodes; double norm, rescaled, hpl, hpm, plm; double *p_anomaly; float *p_mi, *p_mi2; long *p_symbolic, *p_symbolic1, *p_symbolic2, *p_hist, *p_hist1, *p_hist2, *p_hist2d; // Calculate histogram norm norm = 1.0 / n_samples; // Initialize in_samples, in_bins in_samples = in_bins = 0; for (i = 0; i < N; i++) { // Set pointer to anomaly(i,0) p_anomaly = anomaly + in_samples; // Set pointer to symbolic(i,0) p_symbolic = symbolic + in_samples; for (k = 0; k < n_samples; k++) { // Rescale sample into interval [0,1] rescaled = scaling * (*p_anomaly - range_min); // Calculate symbolic trajectories for each time series, // where the symbols are bin numbers. if (rescaled < 1.0) { *p_symbolic = rescaled * n_bins; } else { *p_symbolic = n_bins - 1; } // Calculate 1d-histograms for single time series // Set pointer to hist(i, *p_symbolic) p_hist = hist + in_bins + *p_symbolic; (*p_hist)++; // Set pointer to anomaly(k+1,i) p_anomaly++; // Set pointer to symbolic(k+1,i) p_symbolic++; } in_samples += n_samples; in_bins += n_bins; } // Initialize in_samples, in_bins, in_nodes in_samples = in_bins = in_nodes = 0; for (i = 0; i < N; i++) { // Set pointer to mi(i,0) p_mi = mi + in_nodes; // Set pointer to mi(0,i) p_mi2 = mi + i; // Initialize jn_samples, jn_bins jn_samples = jn_bins = 0; for (j = 0; j <= i; j++) { // Don't do anything for i = j, this case is not of // interest here! if (i != j) { // Set pointer to symbolic(i,0) p_symbolic1 = symbolic + in_samples; // Set pointer to symbolic(j,0) p_symbolic2 = symbolic + jn_samples; // Calculate 2d-histogram for one pair of time series // (i,j). for (k = 0; k < n_samples; k++) { // Set pointer to hist2d(*p_symbolic1, *p_symbolic2) p_hist2d = hist2d + (*p_symbolic1)*n_bins + *p_symbolic2; (*p_hist2d)++; // Set pointer to symbolic(i,k+1) p_symbolic1++; // Set pointer to symbolic(j,k+1) p_symbolic2++; } // Calculate mutual information for one pair of time // series (i,j). // Set pointer to hist(i,0) p_hist1 = hist + in_bins; // Initialize ln_bins ln_bins = 0; for (l = 0; l < n_bins; l++) { // Set pointer to hist(j,0) p_hist2 = hist + jn_bins; // Set pointer to hist2d(l,0) p_hist2d = hist2d + ln_bins; hpl = (*p_hist1) * norm; if (hpl > 0.0) { for (m = 0; m < n_bins; m++) { hpm = (*p_hist2) * norm; if (hpm > 0.0) { plm = (*p_hist2d) * norm; if (plm > 0.0) { *p_mi += plm * log(plm/hpm/hpl); } } // Set pointer to hist(j,m+1) p_hist2++; // Set pointer to hist2d(l,m+1) p_hist2d++; } } // Set pointer to hist(i,l+1) p_hist1++; ln_bins += n_bins; } // Symmetrize MI *p_mi2 = *p_mi; // Initialize ln_bins ln_bins = 0; // Reset hist2d to zero in all bins for (l = 0; l < n_bins; l++) { // Set pointer to hist2d(l,0) p_hist2d = hist2d + ln_bins; for (m = 0; m < n_bins; m++) { *p_hist2d = 0; // Set pointer to hist2d(l,m+1) p_hist2d++; } ln_bins += n_bins; } } // Set pointer to mi(i,j+1) p_mi++; // Set pointer to mi(j+1,i) p_mi2 += N; jn_samples += n_samples; jn_bins += n_bins; } in_samples += n_samples; in_bins += n_bins; in_nodes += N; } """ args = [ 'anomaly', 'n_samples', 'N', 'n_bins', 'scaling', 'range_min', 'symbolic', 'hist', 'hist2d', 'mi' ] if fast: weave_inline(locals(), fastCode, args, blitz=False) else: weave_inline(locals(), code, args) if self.silence_level <= 1: print "Done!" return mi
def get_conditional_entropy(self, array, xyz): """Returns the nearest-neighbor conditional entropy estimate of H(X|Y). Parameters ---------- array : array-like data array with X, Y in rows and observations in columns xyz : array of ints XYZ identifier array of shape (dim,). Here only uses 0 for X and 1 for Y. Returns ------- val : float Entropy estimate. """ dim, T = array.shape if self.knn < 1: knn_here = max(1, int(self.knn * T)) else: knn_here = max(1, int(self.knn)) array = array.astype('float') # Add noise to destroy ties... array += (1E-6 * array.std(axis=1).reshape(dim, 1) * np.random.rand(array.shape[0], array.shape[1])) if self.transform == 'standardize': # Standardize array = array.astype('float') array -= array.mean(axis=1).reshape(dim, 1) array /= array.std(axis=1).reshape(dim, 1) # FIXME: If the time series is constant, return nan rather than # raising Exception if np.isnan(array).sum() != 0: raise ValueError("nans after standardizing, " "possibly constant array!") elif self.transform == 'uniform': array = self._trafo2uniform(array) elif self.transform == 'ranks': array = array.argsort(axis=1).argsort(axis=1).astype('float') # Compute conditional entropy as H(X|Y) = H(X) - I(X;Y) # First compute H(X) # Use cKDTree to get distances eps to the k-th nearest neighbors for # every sample in joint space X with maximum norm x_indices = np.where(xyz == 0)[0] y_indices = np.where(xyz == 1)[0] dim_x = int(np.where(xyz == 0)[0][-1] + 1) if 1 in xyz: dim_y = int(np.where(xyz == 1)[0][-1] + 1 - dim_x) else: dim_y = 0 x_array = np.fastCopyAndTranspose(array[x_indices, :]) tree_xyz = spatial.cKDTree(x_array) epsarray = tree_xyz.query( x_array, k=knn_here + 1, p=np.inf, eps=0., n_jobs=self.n_jobs)[0][:, knn_here].astype('float') h_x = -special.digamma(knn_here) + special.digamma(T) + dim_x * np.log( 2. * epsarray).mean() # Then compute MI(X;Y) if dim_y > 0: xyz_here = np.array( [index for index in xyz if index == 0 or index == 1]) array_xy = array[list(x_indices) + list(y_indices), :] i_xy = self.get_dependence_measure(array_xy, xyz_here) else: i_xy = 0. h_x_y = h_x - i_xy return h_x_y
def cross_correlation(self, tau_max=0, lag_mode='max'): r""" Return cross correlation between all pairs of nodes. Two lag-modes are available (default: lag_mode='max'): lag_mode = 'all': Return 3-dimensional array of lagged cross correlations between all pairs of nodes. An entry :math:`(i, j, \tau)` corresponds to :math:`\rho(X^i_t-\tau, X^j_t)` for positive lags tau, i.e., the direction i --> j for :math:`\tau \ne 0`. lag_mode = 'max': Return matrix of absolute maxima and corresponding lags of lagged cross correlation (CC) between all pairs of nodes. Returns two usually asymmetric matrices of CC values and lags: In each matrix, an entry :math:`(i, j)` corresponds to the (positive or negative) value and lag, respectively, at absolute maximum of :math:`\rho(X^i_t-\tau, X^j_t)` for positive lags tau, i.e., the direction i --> j for :math:`\tau > 0`. The matrices are, thus, asymmetric. The function :meth:`.symmetrize_by_absmax` can be used to obtain a symmetric matrix. **Example:** >>> coup_ana = CouplingAnalysis(CouplingAnalysis.test_data()) >>> similarity_matrix, lag_matrix = coup_ana.cross_correlation( ... tau_max=5, lag_mode='max') >>> r((similarity_matrix, lag_matrix)) (array([[ 1. , 0.757 , 0.779 , 0.7536], [ 0.4847, 1. , 0.4502, 0.5197], [ 0.6219, 0.5844, 1. , 0.5992], [ 0.4827, 0.5509, 0.4996, 1. ]]), array([[0, 4, 1, 2], [0, 0, 0, 0], [0, 3, 0, 1], [0, 2, 0, 0]])) :type tau_max: int [int>=0] :arg tau_max: maximum lag of cross correlation lag function. :type lag_mode: str [('max'|'all')] :arg lag_mode: lag-mode of cross correlations to return. :rtype: 3D-array or tuple of matrices :returns: all-lag array or matrices of value and lag at the absolute maximum. """ data = self.data T, N = data.shape # Sanity checks if not isinstance(data, numpy.ndarray): raise TypeError(f"data is of type {type(data)}, " "must be numpy.ndarray") if N > T: print(f"Warning: data.shape = {data.shape}," " is it of shape (observations, variables) ?") if numpy.isnan(data).sum() != 0: raise ValueError("NaNs in the data") if tau_max < 0: raise ValueError("tau_max = %d, but 0 <= tau_max" % tau_max) if lag_mode not in ['max', 'all']: raise ValueError("lag_mode = %s, but must be one of 'max', 'all'" % lag_mode) # Normalize time series to zero mean and unit variance for all lags corr_range = T - tau_max array = numpy.empty((tau_max + 1, N, corr_range), dtype="float32") for t in range(tau_max + 1): # Remove mean value from time series at each node array[t] = numpy.fastCopyAndTranspose( data[t:t + corr_range, :] - data[t:t + corr_range, :].mean(axis=0).reshape(1, N)) # Normalize the variance of anomalies to one array[t] /= array[t].std(axis=1).reshape(N, 1) # Correct for nodes with zero variance in their time series array[t][numpy.isnan(array[t])] = 0 if lag_mode == 'max': return _cross_correlation_max(array.copy(order='c'), N, tau_max, corr_range) elif lag_mode == 'all': return _cross_correlation_all(array.copy(order='c'), N, tau_max, corr_range) else: return None
def cross_correlation(self, tau_max=0, lag_mode='max'): r""" Return cross correlation between all pairs of nodes. Two lag-modes are available (default: lag_mode='max'): lag_mode = 'all': Return 3-dimensional array of lagged cross correlations between all pairs of nodes. An entry :math:`(i, j, \tau)` corresponds to :math:`\rho(X^i_t-\tau, X^j_t)` for positive lags tau, i.e., the direction i --> j for :math:`\tau \ne 0`. lag_mode = 'max': Return matrix of absolute maxima and corresponding lags of lagged cross correlation (CC) between all pairs of nodes. Returns two usually asymmetric matrices of CC values and lags: In each matrix, an entry :math:`(i, j)` corresponds to the (positive or negative) value and lag, respectively, at absolute maximum of :math:`\rho(X^i_t-\tau, X^j_t)` for positive lags tau, i.e., the direction i --> j for :math:`\tau > 0`. The matrices are, thus, asymmetric. The function :meth:`.symmetrize_by_absmax` can be used to obtain a symmetric matrix. **Example:** >>> coup_ana = CouplingAnalysis(CouplingAnalysis.test_data()) >>> similarity_matrix, lag_matrix = coup_ana.cross_correlation( ... tau_max=5, lag_mode='max') >>> r((similarity_matrix, lag_matrix)) (array([[ 1. , 0.757 , 0.779 , 0.7536], [ 0.4847, 1. , 0.4502, 0.5197], [ 0.6219, 0.5844, 1. , 0.5992], [ 0.4827, 0.5509, 0.4996, 1. ]]), array([[0, 4, 1, 2], [0, 0, 0, 0], [0, 3, 0, 1], [0, 2, 0, 0]])) :type tau_max: int [int>=0] :arg tau_max: maximum lag of cross correlation lag function. :type lag_mode: str [('max'|'all')] :arg lag_mode: lag-mode of cross correlations to return. :rtype: 3D-array or tuple of matrices :returns: all-lag array or matrices of value and lag at the absolute maximum. """ data = self.data T, N = data.shape # Sanity checks if not isinstance(data, numpy.ndarray): raise TypeError("data is of type %s, " % type(data) + "must be numpy.ndarray") if N > T: print("Warning: data.shape = %s," % str(data.shape) + " is it of shape (observations, variables) ?") if numpy.isnan(data).sum() != 0: raise ValueError("NaNs in the data") if tau_max < 0: raise ValueError("tau_max = %d, " % (tau_max) + "but 0 <= tau_max") if lag_mode not in ['max', 'all']: raise ValueError("lag_mode = %s, " % (lag_mode) + "but must be one of 'max', 'all'") # Normalize time series to zero mean and unit variance for all lags corr_range = T - tau_max array = numpy.empty((tau_max + 1, N, corr_range), dtype="float32") for t in range(tau_max + 1): # Remove mean value from time series at each node array[t] = numpy.fastCopyAndTranspose( data[t:t+corr_range, :] - data[t:t+corr_range, :].mean(axis=0).reshape(1, N)) # Normalize the variance of anomalies to one array[t] /= array[t].std(axis=1).reshape(N, 1) # Correct for nodes with zero variance in their time series array[t][numpy.isnan(array[t])] = 0 if lag_mode == 'max': similarity_matrix = numpy.ones((self.N, self.N), dtype='float32') lag_matrix = numpy.zeros((self.N, self.N), dtype='int8') code = r""" int i,j,tau,k, argmax; double crossij, max; // loop over all node pairs, NOT symmetric due to time shifts! for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { if( i != j){ max = 0.0; argmax = 0; // loop over taus INCLUDING the last tau value for( tau = 0; tau < tau_max + 1; tau++) { crossij = 0; // here the actual cross correlation is calculated // assuming standardized arrays for ( k = 0; k < corr_range; k++) { crossij += array(tau, i, k) * array(tau_max, j, k); } // calculate max and argmax by comparing to // previous value and storing max if (fabs(crossij) > fabs(max)) { max = crossij; argmax = tau; } } similarity_matrix(i,j) = max/(float)(corr_range); lag_matrix(i,j) = tau_max - argmax; } } } """ weave_inline(locals(), code, ['array', 'similarity_matrix', 'lag_matrix', 'N', 'tau_max', 'corr_range']) return similarity_matrix, lag_matrix elif lag_mode == 'all': lagfuncs = numpy.zeros((self.N, self.N, tau_max+1), dtype='float32') code = r""" int i,j,tau,k, argmax; double crossij, max; // loop over all node pairs, NOT symmetric due to time shifts! for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { // loop over taus INCLUDING the last tau value for( tau = 0; tau < tau_max + 1; tau++) { crossij = 0; // here the actual cross correlation is calculated // assuming standardized arrays for ( k = 0; k < corr_range; k++) { crossij += array(tau, i, k) * array(tau_max, j, k); } lagfuncs(i,j,tau_max-tau) = crossij/(float)(corr_range); } } } """ weave_inline(locals(), code, ['array', 'lagfuncs', 'N', 'tau_max', 'corr_range']) return lagfuncs