def fit(self, X, y=None): """Fit detector. y is optional for unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : numpy array of shape (n_samples,), optional (default=None) The ground truth of the input samples (labels). """ # Validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) self.detector_ = MinCovDet(store_precision=self.store_precision, assume_centered=self.assume_centered, support_fraction=self.support_fraction, random_state=self.random_state) self.detector_.fit(X=X, y=y) # Use mahalanabis distance as the outlier score self.decision_scores_ = self.detector_.dist_ self._process_decision_scores() return self
def robust_mahalanobis_method(x=None, data=None): #Minimum covariance determinant method rng = np.random.RandomState(0) real_cov = np.cov(data.values.T) X = rng.multivariate_normal(mean=np.mean(data, axis=0), cov=real_cov, size=506) cov = MinCovDet(random_state=0).fit(X) mcd = cov.covariance_ #robust covariance metric robust_mean = cov.location_ #robust mean inv_covmat = sp.linalg.inv(mcd) #inverse covariance metric #Calculate MD with minimum covariance determinant method x_minus_mu = x - robust_mean left_term = np.dot(x_minus_mu, inv_covmat) mahal = np.dot(left_term, x_minus_mu.T) md = mahal.diagonal() #Compare rMD with threshold and flag as outlier outlier = [] C = chi2.ppf((1 - 0.001), df=x.shape[1]) #degrees of freedom = number of variables for index, value in enumerate(md): if value > C: outlier.append(index) else: continue return outlier, md
def MCD_Score(train_a, test_a, test_b): mcd = MinCovDet() mcd.fit(train_a) mcd_anoscore = mcd.mahalanobis(test_a) mcd_normalscore = mcd.mahalanobis(test_b) print("mcd ano score {} mcd normal score {}".format( mcd_anoscore, mcd_normalscore))
def outliers_finder(data_frame: pd.DataFrame) -> pd.DataFrame: """ Finding and removing outliers :param data_frame: :return: """ (df_X, df_y) = splitting_dataset(data_frame) # Define the PCA object pca = PCA() # Run PCA on scaled data and obtain the scores array T = pca.fit_transform(StandardScaler().fit_transform(df_X.values)) # fit a Minimum Covariance Determinant (MCD) robust estimator to data robust_cov = MinCovDet().fit(T[:, :5]) # Get the Mahalanobis distance m = robust_cov.mahalanobis(T[:, :5]) data_frame['mahalanobis'] = m # calculate p-value for each mahalanobis distance data_frame['p'] = 1 - chi2.cdf(data_frame['mahalanobis'], 3) data_frame.sort_values('p', ascending=False) Drops = (data_frame['p'] <= 0.001) data_frame['Drops'] = (data_frame['p'] <= 0.001) indexNames = data_frame[data_frame['Drops'] == True].index print(indexNames.size) data_frame.drop(indexNames, inplace=True) return data_frame
def robust_mahalanobis_method(df): #Minimum covariance determinant rng = np.random.RandomState(0) real_cov = np.cov(df.values.T) X = rng.multivariate_normal(mean=np.mean(df, axis=0), cov=real_cov, size=506) cov = MinCovDet(random_state=0).fit(X) mcd = cov.covariance_ #robust covariance metric robust_mean = cov.location_ #robust mean inv_covmat = sp.linalg.inv(mcd) #inverse covariance metric #Robust M-Distance x_minus_mu = df - robust_mean left_term = np.dot(x_minus_mu, inv_covmat) mahal = np.dot(left_term, x_minus_mu.T) md = np.sqrt(mahal.diagonal()) #Flag as outlier outlier = [] C = np.sqrt(chi2.ppf( (1 - 0.001), df=df.shape[1])) #degrees of freedom = number of variables for index, value in enumerate(md): if value > C: outlier.append(index) else: continue return outlier, md
def reject_outliers(self, nstd=10.): """ update the list of inliers """ from sklearn.covariance import MinCovDet X = np.concatenate((self.qobs, self.qpred * self.wav[:, None]), axis=-1) dist = MinCovDet().fit(X).dist_ self.set_inliers(dist <= nstd**2.)
def interaction_matrix(X, interaction_type='causal', prior_knowledge=None, measure='pwling', estimator='ML', file_name=''): if (interaction_type == 'causal'): lingam = DirectLiNGAM(prior_knowledge=prior_knowledge, measure=measure).fit(X) B = lingam.adjacency_matrix_ C = np.zeros([X.shape[1], X.shape[1]]) for d in range(1, X.shape[1]): C += np.linalg.matrix_power(B, d) return B, C elif (interaction_type == 'correlation'): return np.corrcoef(X.T) - np.eye(X.shape[1]) elif (interaction_type == 'covariance'): if (estimator == 'ML'): est = EmpiricalCovariance(store_precision=True, assume_centered=False).fit(X) elif (estimator == 'MCD'): est = MinCovDet(store_precision=True, assume_centered=False, support_fraction=None).fit(X) cov = est.covariance_ if (np.linalg.matrix_rank(cov) != X.shape[1]): cov += 1e-6 * np.eye(X.shape[1]) l_, P_ = np.linalg.eig(np.linalg.inv(cov)) l = np.diag(np.sqrt(l_)) P = P_.T U = P.T.dot(l).T return cov, U elif (interaction_type == 'precomputed'): df = pd.read_csv(file_name) return df.values
def compute_MCD_weft(weftsPickled, target_path): weft_points_list = floatPointList() for pickled_path in weftsPickled: weft_points_list.extend(pickle.load(open(pickled_path, "rb" ))) x_vals = [fp.x for fp in weft_points_list] y_vals = [fp.y for fp in weft_points_list] mean_hor_dist = weft_points_list.getMedianWeftDist() min_x = min(x_vals) + 1.5 * mean_hor_dist max_x = max(x_vals) - 1.5 * mean_hor_dist min_y = min(y_vals) + 1.5 * mean_hor_dist max_y = max(y_vals) - 1.5 * mean_hor_dist inner_points = floatPointList() for pt in weft_points_list: if min_x < pt.x < max_x and min_y < pt.y < max_y: inner_points.append(pt) X = np.zeros([len(inner_points), 3]) for idx, pt in enumerate(inner_points): X[idx,0] = pt.area X[idx,1] = pt.right_dist X[idx,2] = pt.left_dist Y = X[~(X<=0).any(axis=1)] robust_cov = MinCovDet(support_fraction=0.8).fit(Y) pickle.dump(robust_cov, open(target_path, "wb"))
def _naiveMCD(self, dataset, thresh=3): types = LoLTypeInference().getDataTypes(dataset) qdataset = [[d[i] for i, t in enumerate(types) if t == 'numerical'] for d in dataset] X = featurize(qdataset, [t for t in types if t == 'numerical']) xshape = np.shape(X) #for conditioning problems with the estimate Xsamp = X + 0.01 * np.random.randn(xshape[0], xshape[1]) m = MinCovDet() m.fit(Xsamp) sigma = np.linalg.inv(m.covariance_) mu = np.mean(X, axis=0) results = [] for i in range(0, xshape[0]): val = np.squeeze((X[i, :] - mu) * sigma * (X[i, :] - mu).T)[0, 0] results.append([str(val)]) e = ErrorDetector(results, modules=[QuantitativeErrorModule], config=[{ 'thresh': thresh }]) e.fit() return set([error['cell'][0] for error in e])
def test_mcd_issue3367(): # Check that MCD completes when the covariance matrix is singular # i.e. one of the rows and columns are all zeros rand_gen = np.random.RandomState(0) # Think of these as the values for X and Y -> 10 values between -5 and 5 data_values = np.linspace(-5, 5, 10).tolist() # Get the cartesian product of all possible coordinate pairs from above set data = np.array(list(itertools.product(data_values, data_values))) # Add a third column that's all zeros to make our data a set of point # within a plane, which means that the covariance matrix will be singular data = np.hstack((data, np.zeros((data.shape[0], 1)))) # The below line of code should raise an exception if the covariance matrix # is singular. As a further test, since we have points in XYZ, the # principle components (Eigenvectors) of these directly relate to the # geometry of the points. Since it's a plane, we should be able to test # that the Eigenvector that corresponds to the smallest Eigenvalue is the # plane normal, specifically [0, 0, 1], since everything is in the XY plane # (as I've set it up above). To do this one would start by: # # evals, evecs = np.linalg.eigh(mcd_fit.covariance_) # normal = evecs[:, np.argmin(evals)] # # After which we need to assert that our `normal` is equal to [0, 0, 1]. # Do note that there is floating point error associated with this, so it's # best to subtract the two and then compare some small tolerance (e.g. # 1e-12). MinCovDet(random_state=rand_gen).fit(data)
def wmean(x, w=None, robust=False): '''Weighted mean Calculate the mean of x using weights w. Args: x : array of values to be averaged w : array of weights for each element of x; can be ommitted if robust=True robust : (boolean) robust weights will be internally calculated using FastMCD; only used if robust=True and w is empty Returns: scalar : weighted mean ''' if (w != None): assert len(w) == len(x), 'w must be the same length as x' # Use FastMCD to calculate weights; Another method could be used here if (robust and w == None): w = MinCovDet().fit(np.array([x, x]).T).support_ if (len(w) == 0): raise SystemExit('must specify weights w or select robust=True') assert len(w) == len(x), 'w must be the same length as x' return np.sum(x * w) / np.sum(w)
def wcov(x, y, w=None, ddof=1, robust=False): '''Weighted covariance Calculate the covariance of x and y using weights w. If ddof=1 (default), then the result is the unbiased (sample) covariance when w=1. Implements weighted covariance as defined by NIST Dataplot (https://www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weighvar.pdf) Args: x,y : array of values w : array of weights for each element of x; can be ommitted if robust=True ddof : scalar differential degrees of freedom (Default ddof=1) robust : (boolean) robust weights will be internally calculated using FastMCD; only used if robust=True and w is empty Returns: scalar : weighted covariance ''' n = len(x) assert len(y) == n, 'y must be the same length as x' # Use FastMCD to calculate weights; Another method could be used here if (robust and w == None): w = MinCovDet().fit(np.array([x, y]).T).support_ if (len(w) == 0): raise SystemExit('must specify weights w or select robust=True') assert len(w) == n, 'w must be the same length as x and y' w = wscale(w) nw = np.count_nonzero(w) return np.sum( ( x - wmean(x,w) ) * ( y - wmean(y,w) ) * w ) / \ ( np.sum(w) / nw * (nw - ddof) )
def wcorr(x, y, w=None, robust=False): '''Weighted correlation coeffient Calculate the Pearson linear correlation coefficient of x and y using weights w. This is derived from the weighted covariance and weighted variance. Args: x,y : array of values w : array of weights for each element of x robust : (boolean) robust weights will be internally calculated using FastMCD; only used if robust=True and w is empty Returns: scalar : weighted covariance ''' n = len(x) assert len(y) == n, 'y must be the same length as x' # Use FastMCD to calculate weights; Another method could be used here if (w == None): w = MinCovDet().fit(np.array([x, y]).T).support_ if (len(w) == 0): raise SystemExit('must specify weights w or select robust=True') assert len(w) == n, 'w must be the same length as x and y' w = wscale(w) return wcov(x, y, w) / np.sqrt(wvar(x, w) * wvar(y, w))
def find_outliers_mahalanobis(featMatProjected, extremeness=2., figsize=[8, 8], saveto=None): """ A function to determine to return a list of outlier indices using the Mahalanobis distance. Outlier threshold = std(Mahalanobis distance) * extremeness degree [extreme_values=2, very_extreme_values=3 --> according to 68-95-99.7 rule] """ import numpy as np import pandas as pd import seaborn as sns from pathlib import Path from sklearn.covariance import MinCovDet from matplotlib import pyplot as plt # NB: Euclidean distance puts more weight than it should on correlated variables # Chicken and egg situation, we can’t know they are outliers until we calculate # the stats of the distribution, but the stats of the distribution are skewed by outliers! # Mahalanobis gets around this by weighting by robust estimation of covariance matrix # Fit a Minimum Covariance Determinant (MCD) robust estimator to data robust_cov = MinCovDet().fit( featMatProjected[:, :10]) # Use the first 10 principal components # Get the Mahalanobis distance MahalanobisDist = robust_cov.mahalanobis(featMatProjected[:, :10]) projectedTable = pd.DataFrame(featMatProjected[:,:10],\ columns=['PC' + str(n+1) for n in range(10)]) plt.ioff() if saveto else plt.ion() plt.close('all') plt.style.use(CUSTOM_STYLE) sns.set_style('ticks') fig, ax = plt.subplots(figsize=figsize) ax.set_facecolor('#F7FFFF') plt.scatter(np.array(projectedTable['PC1']), np.array(projectedTable['PC2']), c=MahalanobisDist) # colour PCA by Mahalanobis distance plt.title('Mahalanobis Distance for Outlier Detection', fontsize=20) plt.colorbar() ax.grid(False) if saveto: saveto.parent.mkdir(exist_ok=True, parents=True) suffix = Path(saveto).suffix.strip('.') plt.savefig(saveto, format=suffix, dpi=300) else: plt.show() k = np.std(MahalanobisDist) * extremeness upper_t = np.mean(MahalanobisDist) + k outliers = [] for i in range(len(MahalanobisDist)): if (MahalanobisDist[i] >= upper_t): outliers.append(i) print("Outliers found: %d" % len(outliers)) return np.array(outliers)
def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Fitted estimator. """ # Validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) self.detector_ = MinCovDet(store_precision=self.store_precision, assume_centered=self.assume_centered, support_fraction=self.support_fraction, random_state=self.random_state) self.detector_.fit(X=X, y=y) # Use mahalanabis distance as the outlier score self.decision_scores_ = self.detector_.dist_ self._process_decision_scores() return self
def obtenerOutliersMinCovarianza(self, datosOriginales, datosATestear): clf = MinCovDet().fit(datosOriginales) resultadoValoresATestear = clf.predict(datosATestear) listaOutliers, listaInliers = self.getListasOutliersInliers( resultadoValoresATestear, datosATestear) return listaOutliers, listaInliers
def launch_mcd_on_dataset(n_samples, n_features, n_outliers, tol_loc, tol_cov, tol_support): rand_gen = np.random.RandomState(0) data = rand_gen.randn(n_samples, n_features) # add some outliers outliers_index = rand_gen.permutation(n_samples)[:n_outliers] outliers_offset = 10. * \ (rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5) data[outliers_index] += outliers_offset inliers_mask = np.ones(n_samples).astype(bool) inliers_mask[outliers_index] = False pure_data = data[inliers_mask] # compute MCD by fitting an object mcd_fit = MinCovDet(random_state=rand_gen).fit(data) T = mcd_fit.location_ S = mcd_fit.covariance_ H = mcd_fit.support_ # compare with the estimates learnt from the inliers error_location = np.mean((pure_data.mean(0) - T) ** 2) assert (error_location < tol_loc) error_cov = np.mean((empirical_covariance(pure_data) - S) ** 2) assert (error_cov < tol_cov) assert (np.sum(H) >= tol_support) assert_array_almost_equal(mcd_fit.mahalanobis(data), mcd_fit.dist_)
def test_mcd_issue1127(): # Check that the code does not break with X.shape = (3, 1) # (i.e. n_support = n_samples) rnd = np.random.RandomState(0) X = rnd.normal(size=(3, 1)) mcd = MinCovDet() mcd.fit(X)
def _h_getMahalanobisRobust(dat, critical_alpha=0.01, good_rows=np.zeros(0)): '''Calculate the Mahalanobis distance from the sample vector.''' if good_rows.size == 0: good_rows = np.any(~np.isnan(dat), axis=1) try: dat2fit = dat[good_rows] assert not np.any(np.isnan(dat2fit)) robust_cov = MinCovDet().fit(dat2fit) mahalanobis_dist = np.sqrt(robust_cov.mahalanobis(dat)) except ValueError: # this step will fail if the covariance matrix is not singular. This happens if the data is not # a unimodal symetric distribution. For example there is too many small noisy particles. Therefore # I will take a safe option and return zeros in the mahalanobis # distance if this is the case. mahalanobis_dist = np.zeros(dat.shape[0]) # critial distance of the maholanobis distance using the chi-square distirbution # https://en.wikiversity.org/wiki/Mahalanobis%27_distance # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2.html maha_lim = chi2.ppf(1 - critical_alpha, dat.shape[1]) outliers = mahalanobis_dist > maha_lim return mahalanobis_dist, outliers, maha_lim
def test_mcd_increasing_det_warning(): # Check that a warning is raised if we observe increasing determinants # during the c_step. In theory the sequence of determinants should be # decreasing. Increasing determinants are likely due to ill-conditioned # covariance matrices that result in poor precision matrices. X = [[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [5.0, 3.6, 1.4, 0.2], [4.6, 3.4, 1.4, 0.3], [5.0, 3.4, 1.5, 0.2], [4.4, 2.9, 1.4, 0.2], [4.9, 3.1, 1.5, 0.1], [5.4, 3.7, 1.5, 0.2], [4.8, 3.4, 1.6, 0.2], [4.8, 3.0, 1.4, 0.1], [4.3, 3.0, 1.1, 0.1], [5.1, 3.5, 1.4, 0.3], [5.7, 3.8, 1.7, 0.3], [5.4, 3.4, 1.7, 0.2], [4.6, 3.6, 1.0, 0.2], [5.0, 3.0, 1.6, 0.2], [5.2, 3.5, 1.5, 0.2]] mcd = MinCovDet(random_state=1) warn_msg = "Determinant has increased" with pytest.warns(RuntimeWarning, match=warn_msg): mcd.fit(X)
def __init__(self): # Config for loading no action spectrum (noise data) rospack = rospkg.RosPack() self.train_dir = osp.join(rospack.get_path( 'decopin_hand'), 'train_data') if not osp.exists(self.train_dir): makedirs(self.train_dir) self.noise_data_path = osp.join(self.train_dir, 'noise.npy') if not osp.exists(self.noise_data_path): rospy.logerr('{} is not found. Exit.'.format(self.noise_data_path)) exit() no_action_data = np.load(self.noise_data_path) # extract about 100 data from no_action_data divide = max(1, len(no_action_data) / 100) no_action_data = no_action_data[::divide] # Detect in action or not by mahalanobis distance self.anormal_threshold = rospy.get_param('~anormal_threshold') self.mcd = MinCovDet() self.mcd.fit(no_action_data) rospy.loginfo('Calc covariance matrix for Mahalanobis distance') # ROS self.bridge = CvBridge() self.pub = rospy.Publisher('~in_action', Bool, queue_size=1) self.sub = rospy.Subscriber('~raw_spectrogram', Image, self.cb)
def portfolio_covariance(r, method='normal'): if method == 'normal': r_cov = r.cov() * period_adjustment elif method == 'mcd': r_cov = MinCovDet(random_state=0).fit(r).covariance_ * period_adjustment elif method == 'mest': r_cov = EmpiricalCovariance().fit(r).covariance_ * period_adjustment return r_cov
def MCD_ano_score(): print("マハラノビス距離(each MCD) ano score") mcd = MinCovDet() mcd.fit(train_normal) mcd_anoscore = mcd.mahalanobis(test_normal) mcd_normalscore = mcd.mahalanobis(test_ano) print("mcd ano score {} mcd normal score {}".format( mcd_anoscore, mcd_normalscore))
def mahalanobis_calculate(data, num_pcs): pca = PCA(num_pcs) T = pca.fit_transform(data) # fit a Minimum Covariance Determinant (MCD) robust estimator to data robust_cov = MinCovDet().fit(T) # Get the Mahalanobis distance m = robust_cov.mahalanobis(T) return m
def as7262_outliers(data, scatter_correction=None): data_columns = data[as7262_wavelengths] print(data_columns) # data_columns.T.plot() # plt.plot(data_columns.T) plt.show() if scatter_correction == "SNV": data_columns = processing.snv(data_columns) elif scatter_correction == "MSC": data_columns, _ = processing.msc(data_columns) # svm = OneClassSVM().fit_predict(snv_data) # print(svm) robust_cov = MinCovDet().fit(data_columns) mahal_dist = robust_cov.mahalanobis(data_columns) # mahal_dist = MahalanobisDist(np.array(data_columns), verbose=True) print(mahal_dist) zscore(data_columns) print('+++++') mean = np.mean(mahal_dist) std = 3*np.std(mahal_dist) print(mean, std) print(mean - std, mean + std) zscore_mahal = (mahal_dist - mean) / np.std(mahal_dist) # print(zscore_mahal) # print(zscore_mahal.max(), zscore_mahal.argmax(), data_columns.loc[zscore_mahal.argmax()]) print('pppp') print(data_columns) print(zscore_mahal.argmax()) outliers = data_columns.loc[zscore_mahal > 3].index outliers = data_columns.iloc[zscore_mahal.argmax()].name # print(data_columns.loc[zscore_mahal > 3].index) rows = data_columns.loc[outliers] # print(data_columns.loc[zscore_mahal.argmax()].name) print(data_columns.shape) print(rows) # print((mahal_dist-mahal_dist.mean()).std()) # print(mahal_dist.std()) # print(mahal_dist.mean() + 3*mahal_dist.std()) # mahal_dist2 = MahalanobisDist(np.array(data_columns), verbose=True) n, bins, _ = plt.hist(zscore_mahal, bins=40) plt.show() # x_hist = np.linspace(min(mahal_dist), max(mahal_dist), 100) # # popt, pcov = curve_fit(gauss_function, bins[:len(n)], n, maxfev=100000, p0=[300, 0, 20]) # new_fit = gauss_function(x_hist, *popt) # plt.plot(x_hist, new_fit, 'r--') # color = data_columns.shape[0] * ["#000000"] # color[data_columns.loc[zscore_mahal.argmax()].name] = "#FF0000" plt.plot(data_columns.T, c="black") plt.plot(rows.T, c="red") plt.plot(data_columns.mean(), c="blue", lw=4) # snv_data.T.plot(color=color) plt.show()
def detect(train_data: np.ndarray, test_data: np.ndarray) -> list: estimated_covarianvce = MinCovDet().fit(train_data) train_dist = estimated_covarianvce.mahalanobis(train_data) np_max = np.max(train_dist) return [ 0 if data <= np_max else 1 for data in estimated_covarianvce.mahalanobis(test_data) ]
def __init__(self, cov_estimator=MinCovDet(), threshold=None): if not isinstance(cov_estimator, EmpiricalCovariance): raise TypeError( "Estimator must be a sklearn.covariance.EmpiricalCovariance class" ) self.cov_estimator = cov_estimator self.threshold = threshold self.attr_to_check = ["mahal_dist_"]
def calc_robust_covariance_matrix(data_row, weights=None, centered=True, random_state=None): if weights is not None: data_row = inflate_data_using_weights(data_row, weights) C = MinCovDet(assume_centered=centered, random_state=random_state).fit(data_row).covariance_ return C
def __init__(self, method='robust', estimator_kw_args={}): if method is 'robust': self.covariance_estimator_ = MinCovDet(**estimator_kw_args) elif method is 'empirical': self.covariance_estimator_ = EmpiricalCovariance( **estimator_kw_args) else: raise ValueError( "{} is not a valid method. Must be one of 'robust' or 'empirical'" .format(method))
def mahal_plot(e): first_half = e[1:len(e) - 1] second_half = e[2:len(e)] X = np.array([first_half, second_half]) X = np.transpose(X) # fit a Minimum Covariance Determinant (MCD) robust estimator to data robust_cov = MinCovDet().fit(X) # compare estimators learnt from the full data set with true parameters emp_cov = EmpiricalCovariance().fit(X) fig = plt.figure() # Show data set subfig1 = plt.subplot(1, 1, 1) inlier_plot = subfig1.scatter(first_half, second_half, color='black', label='daily diff in homes passed') subfig1.set_title("Mahalanobis distances of the iid invariants:") # Show contours of the distance functions xx, yy = np.meshgrid(np.linspace(plt.xlim()[0], plt.xlim()[1], 800), np.linspace(plt.ylim()[0], plt.ylim()[1], 100)) zz = np.c_[xx.ravel(), yy.ravel()] mahal_emp_cov = emp_cov.mahalanobis(zz) mahal_emp_cov = mahal_emp_cov.reshape(xx.shape) emp_cov_contour = subfig1.contour(xx, yy, np.sqrt(mahal_emp_cov), cmap=plt.cm.PuBu_r, linestyles='dashed') mahal_robust_cov = robust_cov.mahalanobis(zz) mahal_robust_cov = mahal_robust_cov.reshape(xx.shape) robust_contour = subfig1.contour(xx, yy, np.sqrt(mahal_robust_cov), cmap=plt.cm.YlOrBr_r, color='red', linewidth="3") subfig1.legend([ emp_cov_contour.collections[1], robust_contour.collections[1], inlier_plot ], ['MLE dist', 'robust dist', 'kpis'], loc="upper right", borderaxespad=0) print(np.corrcoef(first_half, second_half)) return (robust_cov, emp_cov)