def _compute_matrix_profile(self): """ Compute the matrix profile using STOMP. """ mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size) QT = utils.sliding_dot_product(self.ts2[:self.window_size], self.ts1) if self._same_ts: mu_Q, sigma_Q = mu_T, sigma_T TQ = np.copy(QT) else: mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size) TQ = utils.sliding_dot_product(self.ts1[:self.window_size], self.ts2) D = utils.calculate_distance_profile(QT, self.window_size, mu_Q[0], sigma_Q[0], mu_T, sigma_T) if self._same_ts: lower_ez_bound = 0 upper_ez_bound = min(len(self.ts2), self.exclusion_zone) + 1 D[lower_ez_bound:upper_ez_bound] = np.inf self._matrix_profile = np.copy(D) self._index_profile = np.zeros((len(self.ts1) - self.window_size + 1,)) for idx in self._iterator: QT[1:] = QT[:len(self.ts1)-self.window_size] - self.ts1[:len(self.ts1)-self.window_size] * self.ts2[idx-1] \ + self.ts1[self.window_size:] * self.ts2[idx + self.window_size - 1] QT[0] = TQ[idx] D = utils.calculate_distance_profile(QT, self.window_size, mu_Q[idx], sigma_Q[idx], mu_T, sigma_T) self._elementwise_min(D, idx)
def _compute_matrix_profile(self): """ Compute the matrix profile using PreSCRIMP. """ try: mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size) if self._same_ts: mu_Q, sigma_Q = mu_T, sigma_T else: mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size) for n_iter, idx in enumerate(self._iterator): D = utils.mass(self.ts2[idx: idx+self.window_size], self.ts1) self._elementwise_min(D, idx) jdx = np.argmin(D) # the index of closest profile to the current idx # compute diagonals until the next sampled point q1 = self.ts2[idx:idx + self.sample_interval + self.window_size - 1] q2 = self.ts1[jdx:jdx + self.sample_interval + self.window_size - 1] lq = min(len(q1), len(q2)) q = q1[:lq] * q2[:lq] q = utils.rolling_sum(q, self.window_size) D = utils.calculate_distance_profile(q, self.window_size, mu_Q[idx:idx + len(q)], sigma_Q[idx:idx + len(q)], mu_T[jdx:jdx + len(q)], sigma_T[jdx:jdx + len(q)]) self._index_profile[jdx: jdx + len(q)] = np.where(D < self._matrix_profile[jdx:jdx + len(q)], np.arange(idx, idx + len(q)), self._index_profile[jdx:jdx + len(q)]) self._matrix_profile[jdx:jdx + len(q)] = np.minimum(D, self._matrix_profile[jdx:jdx + len(q)]) if self._same_ts: self._index_profile[idx:idx + len(q)] = np.where(D < self._matrix_profile[idx:idx + len(q)], np.arange(jdx, jdx + len(q)), self._index_profile[idx:idx + len(q)]) self._matrix_profile[idx:idx + len(q)] = np.minimum(D, self._matrix_profile[idx:idx + len(q)]) # compute diagonals until the previous sampled point if idx != 0 and jdx != 0: q1 = self.ts2[max(0, idx - self.sample_interval):(idx + self.window_size - 1)] q2 = self.ts1[max(0, jdx - self.sample_interval):(jdx + self.window_size - 1)] lq = min(len(q1), len(q2)) q = q1[-lq:] * q2[-lq:] q = utils.rolling_sum(q, self.window_size) D = utils.calculate_distance_profile(q, self.window_size, mu_Q[idx - len(q):idx], sigma_Q[idx - len(q):idx], mu_T[jdx - len(q):jdx], sigma_T[jdx - len(q):jdx]) self._index_profile[jdx - len(q): jdx] = np.where(D < self._matrix_profile[jdx - len(q):jdx], np.arange(idx - len(q), idx), self._index_profile[jdx - len(q):jdx]) self._matrix_profile[jdx - len(q):jdx] = np.minimum(D, self._matrix_profile[jdx - len(q):jdx]) if self._same_ts: self._index_profile[idx - len(q):idx] = np.where(D < self._matrix_profile[idx - len(q):idx], np.arange(jdx - len(q), jdx), self._index_profile[idx - len(q):idx]) self._matrix_profile[idx - len(q):idx] = np.minimum(D, self._matrix_profile[idx - len(q):idx]) except KeyboardInterrupt: if self.verbose: tqdm.write("Calculation interrupted at iteration {}. Approximate result returned.".format(n_iter))
def test_calculate_distance_profile_data1(self): qt = np.loadtxt("./data/random_walk_data_sdp.csv") rolling_mean = np.loadtxt("./data/random_walk_data_rolling_mean.csv") rolling_std = np.loadtxt("./data/random_walk_data_rolling_std.csv") m = 1000 dp = utils.calculate_distance_profile(qt, m, rolling_mean[0], rolling_std[0], rolling_mean, rolling_std) ans = np.loadtxt("./data/random_walk_data_distance_profile.csv") assert len(dp) == len(qt), "mass_data1: result should have correct length" assert np.allclose(dp, ans), "calculate_distance_profile_data1: distance profile should be computer correctly"
def test_calculate_distance_profile_constant_sequence_and_query(self): n = 100 m = np.random.randint(10, n // 2) t = np.full(n, np.random.rand()) q = np.full(m, np.random.rand()) qt = utils.sliding_dot_product(q, t) rolling_mean, rolling_std = utils.rolling_avg_sd(t, m) dp = utils.calculate_distance_profile(qt, m, np.mean(q), np.std(q), rolling_mean, rolling_std) assert np.allclose(dp, np.full(n - m + 1, 0)), "calculate_distance_profile_constant_sequence_and_query: " \ "distance of constant query to constant sequence is ero by definition."
def test_calculate_distance_profile_constant_query(self): n = 100 m = np.random.randint(10, n // 2) t = np.random.rand(n) q = np.full(m, np.random.rand()) qt = utils.sliding_dot_product(q, t) rolling_mean, rolling_std = utils.rolling_avg_sd(t, m) dp = utils.calculate_distance_profile(qt, m, q[0], 0, rolling_mean, rolling_std) assert np.allclose(dp, np.full(n - m + 1, np.sqrt(m))), "calculate_distance_profile_constant_query: " \ "distance of nonconstant sequence to constant query is sqrt(m) by definition."
def _compute_matrix_profile(self): """ Compute the matrix profile using SCRIMP. """ if self.verbose and self.pre_scrimp > 0: tqdm.write("SCRIMP:") try: n1 = len(self.ts1) n2 = len(self.ts2) mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size) if self._same_ts: mu_Q, sigma_Q = mu_T, sigma_T else: mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size) for n_iter, k in enumerate(self._iterator): if k >= 0: # compute diagonals starting from a slot in first column q = self.ts2[k:k+n1] * self.ts1[:n2-k] q = utils.rolling_sum(q, self.window_size) D = utils.calculate_distance_profile(q, self.window_size, mu_Q[k:k+len(q)], sigma_Q[k:k+len(q)], mu_T[:len(q)], sigma_T[:len(q)]) self._index_profile[:len(q)] = np.where(D < self._matrix_profile[:len(q)], np.arange(k, k + len(q)), self._index_profile[:len(q)]) self._matrix_profile[:len(q)] = np.minimum(D, self._matrix_profile[:len(q)]) if self._same_ts: self._index_profile[k:k+len(q)] = np.where(D < self._matrix_profile[k:k+len(q)], np.arange(len(q)), self._index_profile[k:k+len(q)]) self._matrix_profile[k:k+len(q)] = np.minimum(D, self._matrix_profile[k:k+len(q)]) else: # compute diagonals starting from a slot in first row k = -k q = self.ts2[:n1-k] * self.ts1[k:k+n2] q = utils.rolling_sum(q, self.window_size) D = utils.calculate_distance_profile(q, self.window_size, mu_Q[:len(q)], sigma_Q[:len(q)], mu_T[k:k+len(q)], sigma_T[k:k+len(q)]) self._index_profile[k:k+len(q)] = np.where(D < self._matrix_profile[k:k+len(q)], np.arange(len(q)), self._index_profile[k:k+len(q)]) self._matrix_profile[k:k+len(q)] = np.minimum(D, self._matrix_profile[k:k+len(q)]) except KeyboardInterrupt: if self.verbose: tqdm.write("Calculation interrupted at iteration {}. Approximate result returned.".format(n_iter))
def test_calculate_distance_profile_invalid_rolling_std(self): with pytest.raises(ValueError): qt = np.random.rand(100) rolling_mean = np.random.rand(100) rolling_std = np.random.rand(101) dp = utils.calculate_distance_profile(qt, 10, rolling_mean[0], rolling_std[0], rolling_mean, rolling_std)