data = datasets.loadDatasets(dataset, 'interval') # Try different thresholds for interval proposing results = OrderedDict() for propmeth in PROPMETHODS: results[propmeth] = OrderedDict() for sd_th in THS: ygts = [] regions = [] for ftype in data: for func in data[ftype]: ygts.append(func['gt']) ts = preproc.normalize_time_series(func['ts']) if td_dim > 1: ts = preproc.td(ts, td_dim, td_lag) regions.append(list(pointwiseRegionProposals(ts, method = propmeth, sd_th = sd_th, extint_min_len = 10, extint_max_len = extint_max_len))) results[propmeth][sd_th] = eval.recall_precision(ygts, regions, multiAsFP = False) # Print results as table labels = ('Recall', 'Precision', 'F1-Score') hdiv_len = 5 + sum(len(lbl) + 3 for lbl in labels) # length of horizontal divider for propmeth, res in results.items(): print('\n-- {} --\n'.format(propmeth)) print(' |' + '|'.join(' {} '.format(lbl) for lbl in labels)) print('{:-<{}s}'.format('', hdiv_len))
print('Testing on synthetic data: {} <method = gaussian_cov> <dataset> <td-embed = 1>'.format(sys.argv[0])) print('Methods: gaussian_cov, gaussian_cov_ts, gaussian_global_cov, gaussian_id_cov, gaussian_id_cov_normalized, parzen, compare') exit() methods = ['parzen', 'gaussian_id_cov', 'gaussian_cov', 'gaussian_cov_ts'] if method == 'compare' else [method] if dataset == 'noise': # Sample a time series consisting of pure white noise np.random.seed(0) ts = np.random.randn(dim, n) else: import datasets ts = datasets.loadSyntheticTestbench()[dataset][0]['ts'] # Retrieve scores for all intervals pts = preproc.td(ts, td_embed) if td_embed > 1 else ts scores = dict() for meth in methods: proposals = maxdiv.denseRegionProposals(pts, extint_min_len, extint_max_len) if meth == 'gaussian_id_cov_normalized': norm_scores = maxdiv.maxdiv_gaussian(pts, proposals, mode = 'I_OMEGA', gaussian_mode = 'ID_COV') # Compute theoretical means and standard deviations of the chi^2 distributions X = np.arange(extint_min_len, extint_max_len + 1) scales = 1.0 / X - 1.0 / (pts.shape[1] - X) chi_mean = pts.shape[0] * scales chi_sd = np.sqrt(2 * pts.shape[0] * (scales ** 2)) # Normalize scores for i, (a, b, score) in enumerate(norm_scores): ind = b - a - extint_min_len norm_scores[i] = (a, b, (score - chi_mean[ind]) / chi_sd[ind]) # Add a constant offset to avoid negative scores
if __name__ == '__main__': import sys method = sys.argv[1] if len(sys.argv) > 1 else 'gaussian_cov_ts' propmeth = sys.argv[2] if len(sys.argv) > 2 else 'dense' # Load data data, dates = read_hpw_csv('HPW_2012_41046.csv') data = preproc.normalize_time_series(data) # Detect if method in ['hotellings_t', 'kde']: if method == 'kde': scores = baselines_noninterval.pointwiseKDE(preproc.td(data)) else: scores = baselines_noninterval.hotellings_t(preproc.td(data)) regions = baselines_noninterval.pointwiseScoresToIntervals(scores, 24) elif method == 'gaussian_cov_ts': regions = maxdiv.maxdiv(data, 'gaussian_cov', mode='TS', td_dim=3, td_lag=1, proposals=propmeth, extint_min_len=24, extint_max_len=72, num_intervals=5) else: regions = maxdiv.maxdiv(data,
else: dataset = 'synthetic' try: data = datasets.loadDatasets(dataset) except: print('Unknown dataset: {}'.format(dataset)) exit() if ftype not in data: print('Unknown extreme type: {}'.format(ftype)) exit() # Detect and plot anomaly boundaries for func in data[ftype]: # Compute scores for each point scores = METHODS[method](preproc.td(func['ts'], embed_dim)) # Score statistics score_mean = np.mean(scores) score_sd = np.std(scores) score_median = np.median(scores) score_mad = 1.4826 * np.median(np.abs(scores - score_median)) # Compute gradient of scores pad = (len(GRAD_FILTER) - 1) // 2 padded_scores = np.concatenate((scores[:pad], scores, scores[-pad:])) score_gradient = np.abs( np.convolve(padded_scores, GRAD_FILTER, 'valid')) score_gradient_mean = np.mean(score_gradient) score_gradient_sd = np.std(score_gradient) score_gradient_median = np.median(score_gradient)