def test_get_imfs_and_trend(self): emd = EMD() T = np.linspace(0, 2 * np.pi, 100) expected_trend = 5 * T S = 2 * np.sin(4.1 * 6.28 * T) + 1.2 * np.cos( 7.4 * 6.28 * T) + expected_trend all_imfs = emd(S) imfs, trend = emd.get_imfs_and_trend() onset_trend = trend - trend.mean() onset_expected_trend = expected_trend - expected_trend.mean() self.assertEqual(all_imfs.shape[0], imfs.shape[0] + 1, "Compare number of components") self.assertTrue(np.array_equal(all_imfs[:-1], imfs), "Shouldn't matter where imfs are from") self.assertTrue( np.allclose(onset_trend, onset_expected_trend, rtol=0.1, atol=0.5), "Extracted trend should be close to the actual trend")
class EEMD: """ **Ensemble Empirical Mode Decomposition** Ensemble empirical mode decomposition (EEMD) [Wu2009]_ is noise-assisted technique, which is meant to be more robust than simple Empirical Mode Decomposition (EMD). The robustness is checked by performing many decompositions on signals slightly perturbed from their initial position. In the grand average over all IMF results the noise will cancel each other out and the result is pure decomposition. Parameters ---------- trials : int (default: 100) Number of trials or EMD performance with added noise. noise_width : float (default: 0.05) Standard deviation of Gaussian noise (:math:`\hat\sigma`). It's relative to absolute amplitude of the signal, i.e. :math:`\hat\sigma = \sigma\cdot|\max(S)-\min(S)|`, where :math:`\sigma` is noise_width. ext_EMD : EMD (default: None) One can pass EMD object defined outside, which will be used to compute IMF decompositions in each trial. If none is passed then EMD with default options is used. parallel : bool (default: False) Flag whether to use multiprocessing in EEMD execution. Since each EMD(s+noise) is independent this should improve execution speed considerably. *Note* that it's disabled by default because it's the most common problem when EEMD takes too long time to finish. If you set the flag to True, make also sure to set `processes` to some reasonable value. processes : int or None (optional) Number of processes harness when executing in parallel mode. The value should be between 1 and max that depends on your hardware. separate_trends : bool (default: False) Flag whether to isolate trends from each EMD decosition into a separate component. If `true`, the resulting EEMD will contain ensemble only from IMFs and the mean residue will be stacked as the last element. References ---------- .. [Wu2009] Z. Wu and N. E. Huang, "Ensemble empirical mode decomposition: A noise-assisted data analysis method", Advances in Adaptive Data Analysis, Vol. 1, No. 1 (2009) 1-41. """ logger = logging.getLogger(__name__) noise_kinds_all = ["normal", "uniform"] def __init__(self, trials: int = 100, noise_width: float = 0.05, ext_EMD=None, parallel: bool = False, **kwargs): # Ensemble constants self.trials = trials self.noise_width = noise_width self.separate_trends = bool(kwargs.get('separate_trends', False)) self.random = np.random.RandomState() self.noise_kind = kwargs.get('noise_kind', 'normal') self.parallel = parallel self.processes = kwargs.get('processes') # Optional[int] if self.processes is not None and not self.parallel: self.logger.warning( "Passed value for process has no effect when `parallel` is False." ) if ext_EMD is None: from PyEMD import EMD self.EMD = EMD(**kwargs) else: self.EMD = ext_EMD self.E_IMF = None # Optional[np.ndarray] self.residue = None # Optional[np.ndarray] def __call__(self, S: np.ndarray, T: Optional[np.ndarray] = None, max_imf: int = -1) -> np.ndarray: return self.eemd(S, T=T, max_imf=max_imf) def __getstate__(self) -> Dict: self_dict = self.__dict__.copy() if 'pool' in self_dict: del self_dict['pool'] return self_dict def generate_noise(self, scale: float, size: Union[int, Sequence[int]]) -> np.ndarray: """ Generate noise with specified parameters. Currently supported distributions are: * *normal* with std equal scale. * *uniform* with range [-scale/2, scale/2]. Parameters ---------- scale : float Width for the distribution. size : int Number of generated samples. Returns ------- noise : numpy array Noise sampled from selected distribution. """ if self.noise_kind == "normal": noise = self.random.normal(loc=0, scale=scale, size=size) elif self.noise_kind == "uniform": noise = self.random.uniform(low=-scale / 2, high=scale / 2, size=size) else: raise ValueError( "Unsupported noise kind. Please assigned `noise_kind`" + " to be one of these: " + str(self.noise_kinds_all)) return noise def noise_seed(self, seed: int) -> None: """Set seed for noise generation.""" self.random.seed(seed) def eemd(self, S: np.ndarray, T: Optional[np.ndarray] = None, max_imf: int = -1) -> np.ndarray: """ Performs EEMD on provided signal. For a large number of iterations defined by `trials` attr the method performs :py:meth:`emd` on a signal with added white noise. Parameters ---------- S : numpy array, Input signal on which EEMD is performed. T : numpy array or None, (default: None) If none passed samples are numerated. max_imf : int, (default: -1) Defines up to how many IMFs each decomposition should be performed. By default (negative value) it decomposes all IMFs. Returns ------- eIMF : numpy array Set of ensemble IMFs produced from input signal. In general, these do not have to be, and most likely will not be, same as IMFs produced using EMD. """ if T is None: T = np.arange(len(S), dtype=S.dtype) scale = self.noise_width * np.abs(np.max(S) - np.min(S)) self._S = S self._T = T self._N = N = len(S) self._scale = scale self.max_imf = max_imf # For trial number of iterations perform EMD on a signal # with added white noise if self.parallel: pool = Pool(processes=self.processes) all_IMFs = pool.map(self._trial_update, range(self.trials)) pool.close() else: # Not parallel all_IMFs = map(self._trial_update, range(self.trials)) proto_eimf = defaultdict(lambda: np.zeros(N)) proto_count = defaultdict(int) for (imfs, trend) in all_IMFs: # A bit of explanation here. # If the `trend` is not None, that means it was intentionally separated in the decomp process. # This might due to `separate_trends` flag which means that trends are summed up separately # and treated as the last component. Since `proto_eimfs` is a dict, that `-1` is treated literally # and **not** as the *last position*. We can then use that `-1` to always add it as the last pos # in the actual eIMF, which indicates the trend. if trend is not None: proto_eimf[-1] += trend proto_count[-1] += 1 for imf_num, imf in enumerate(imfs): proto_eimf[imf_num] += imf proto_count[imf_num] += 1 self.E_IMF = np.zeros((len(proto_eimf), N)) # *Note*: We are diving by proto-imf count per order, rather than blantly by num of trails. # This means we're calculating a mean over all observations rather. for imf_num, imf in proto_eimf.items(): self.E_IMF[imf_num] += imf / proto_count[imf_num] self.residue = S - np.sum(self.E_IMF, axis=0) return self.E_IMF def _trial_update(self, trial) -> Tuple[np.ndarray, Optional[np.ndarray]]: """A single trial evaluation, i.e. EMD(signal + noise). *Note*: Although `trial` argument isn't used it's needed for the (multiprocessing) map method. """ noise = self.generate_noise(self._scale, self._N) imfs = self.emd(self._S + noise, self._T, self.max_imf) trend = None if self.separate_trends: imfs, trend = self.EMD.get_imfs_and_trend() return (imfs, trend) def emd(self, S: np.ndarray, T: np.ndarray, max_imf: int = -1) -> np.ndarray: """Vanilla EMD method. Provides emd evaluation from provided EMD class. For reference please see :class:`PyEMD.EMD`. """ return self.EMD.emd(S, T, max_imf) def get_imfs_and_residue(self) -> Tuple[np.ndarray, np.ndarray]: """ Provides access to separated imfs and residue from recently analysed signal. Returns ------- (imfs, residue) : (np.ndarray, np.ndarray) Tuple that contains all imfs and a residue (if any). """ if self.E_IMF is None or self.residue is None: raise ValueError( 'No IMF found. Please, run EMD method or its variant first.') return self.E_IMF, self.residue