def test_mass_data1(self): t = np.loadtxt("./data/random_walk_data.csv") q = t[:1000] dp = utils.mass(q, t) ans = np.loadtxt("./data/random_walk_data_distance_profile.csv") assert len(dp) == len(t) - len(q) + 1, "mass_data1: result should have correct length" assert np.allclose(dp, ans, atol=1e-5), "mass_data1: distance profile should be computed correctly"
def test_mass_sanity(self): t = np.random.rand(1000) m = np.random.randint(len(t) - 10) k = np.random.randint(10, len(t) - m) q = t[k:k+m] dp = utils.mass(q, t) assert dp[k] < 1e-5, "test_mass_sanity: distance of a series to itself should be zero"
def test_mass_random_data(self): n = np.random.randint(100, 1000) m = np.random.randint(10, n) q = np.random.rand(m) t = np.random.rand(n) dp = utils.mass(q, t) ans = helpers.naive_distance_profile(q, t) assert np.allclose(dp, ans), "mass_random_data: distance profile should be computed correctly"
def _compute_matrix_profile(self): """ Compute the matrix profile using STAMP. """ try: for n_iter, idx in enumerate(self._iterator): D = utils.mass(self.ts2[idx: idx+self.window_size], self.ts1) self._elementwise_min(D, idx) except KeyboardInterrupt: if self.verbose: tqdm.write("Calculation interrupted at iteration {}. Approximate result returned.".format(n_iter))
def _compute_matrix_profile(self): """ Compute the matrix profile using PreSCRIMP. """ try: mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size) if self._same_ts: mu_Q, sigma_Q = mu_T, sigma_T else: mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size) for n_iter, idx in enumerate(self._iterator): D = utils.mass(self.ts2[idx: idx+self.window_size], self.ts1) self._elementwise_min(D, idx) jdx = np.argmin(D) # the index of closest profile to the current idx # compute diagonals until the next sampled point q1 = self.ts2[idx:idx + self.sample_interval + self.window_size - 1] q2 = self.ts1[jdx:jdx + self.sample_interval + self.window_size - 1] lq = min(len(q1), len(q2)) q = q1[:lq] * q2[:lq] q = utils.rolling_sum(q, self.window_size) D = utils.calculate_distance_profile(q, self.window_size, mu_Q[idx:idx + len(q)], sigma_Q[idx:idx + len(q)], mu_T[jdx:jdx + len(q)], sigma_T[jdx:jdx + len(q)]) self._index_profile[jdx: jdx + len(q)] = np.where(D < self._matrix_profile[jdx:jdx + len(q)], np.arange(idx, idx + len(q)), self._index_profile[jdx:jdx + len(q)]) self._matrix_profile[jdx:jdx + len(q)] = np.minimum(D, self._matrix_profile[jdx:jdx + len(q)]) if self._same_ts: self._index_profile[idx:idx + len(q)] = np.where(D < self._matrix_profile[idx:idx + len(q)], np.arange(jdx, jdx + len(q)), self._index_profile[idx:idx + len(q)]) self._matrix_profile[idx:idx + len(q)] = np.minimum(D, self._matrix_profile[idx:idx + len(q)]) # compute diagonals until the previous sampled point if idx != 0 and jdx != 0: q1 = self.ts2[max(0, idx - self.sample_interval):(idx + self.window_size - 1)] q2 = self.ts1[max(0, jdx - self.sample_interval):(jdx + self.window_size - 1)] lq = min(len(q1), len(q2)) q = q1[-lq:] * q2[-lq:] q = utils.rolling_sum(q, self.window_size) D = utils.calculate_distance_profile(q, self.window_size, mu_Q[idx - len(q):idx], sigma_Q[idx - len(q):idx], mu_T[jdx - len(q):jdx], sigma_T[jdx - len(q):jdx]) self._index_profile[jdx - len(q): jdx] = np.where(D < self._matrix_profile[jdx - len(q):jdx], np.arange(idx - len(q), idx), self._index_profile[jdx - len(q):jdx]) self._matrix_profile[jdx - len(q):jdx] = np.minimum(D, self._matrix_profile[jdx - len(q):jdx]) if self._same_ts: self._index_profile[idx - len(q):idx] = np.where(D < self._matrix_profile[idx - len(q):idx], np.arange(jdx - len(q), jdx), self._index_profile[idx - len(q):idx]) self._matrix_profile[idx - len(q):idx] = np.minimum(D, self._matrix_profile[idx - len(q):idx]) except KeyboardInterrupt: if self.verbose: tqdm.write("Calculation interrupted at iteration {}. Approximate result returned.".format(n_iter))
def update_ts2(self, pt): """ Update the time-series ts2 with a new data point at the end of the series. If doing self-join (ts1 == ts2), both series will be updated. :param float pt: The value of the new data point, to be attached to the end of ts2. """ if self._same_ts: self.update_ts1(pt) else: self.ts2 = np.append(self.ts2, pt) s = self.ts2[-self.window_size:] idx = len(self.ts2) - self.window_size D = utils.mass(s, self.ts1) self._elementwise_min(D, idx)
def update_ts1(self, pt): """ Update the time-series ts1 with a new data point at the end of the series. If doing self-join (ts1 == ts2), both series will be updated. :param float pt: The value of the new data point, to be attached to the end of ts1. """ self.ts1 = np.append(self.ts1, pt) if self._same_ts: self.ts2 = np.copy(self.ts1) s = self.ts1[-self.window_size:] idx = len(self.ts1) - self.window_size D = utils.mass(s, self.ts2) if self._same_ts: lower_ez_bound = max(0, idx - self.exclusion_zone) upper_ez_bound = min(len(self.ts2), idx + self.exclusion_zone) + 1 D[lower_ez_bound:upper_ez_bound] = np.inf self._index_profile[self._matrix_profile > D[:-1]] = idx self._matrix_profile = np.minimum(self._matrix_profile, D[:-1]) min_idx = np.argmin(D) self._index_profile = np.append(self._index_profile, min_idx) self._matrix_profile = np.append(self._matrix_profile, D[min_idx])
def test_mass_query_too_long(self): with pytest.raises(ValueError): q = np.random.rand(1000) t = np.random.rand(200) dp = utils.mass(q, t)