Пример #1
0
data = datasets.loadDatasets(dataset, 'interval')

# Try different thresholds for interval proposing
results = OrderedDict()
for propmeth in PROPMETHODS:
    results[propmeth] = OrderedDict()
    for sd_th in THS:
        ygts = []
        regions = []
        
        for ftype in data:
            for func in data[ftype]:
                ygts.append(func['gt'])
                ts = preproc.normalize_time_series(func['ts'])
                if td_dim > 1:
                    ts = preproc.td(ts, td_dim, td_lag)
                regions.append(list(pointwiseRegionProposals(ts, method = propmeth, sd_th = sd_th,
                                                             extint_min_len = 10, extint_max_len = extint_max_len)))
            
        results[propmeth][sd_th] = eval.recall_precision(ygts, regions, multiAsFP = False)


# Print results as table
labels = ('Recall', 'Precision', 'F1-Score')
hdiv_len = 5 + sum(len(lbl) + 3 for lbl in labels) # length of horizontal divider

for propmeth, res in results.items():
    print('\n-- {} --\n'.format(propmeth))

    print('     |' + '|'.join(' {} '.format(lbl) for lbl in labels))
    print('{:-<{}s}'.format('', hdiv_len))
Пример #2
0
    print('Testing on synthetic data:  {} <method = gaussian_cov> <dataset> <td-embed = 1>'.format(sys.argv[0]))
    print('Methods: gaussian_cov, gaussian_cov_ts, gaussian_global_cov, gaussian_id_cov, gaussian_id_cov_normalized, parzen, compare')
    exit()

methods = ['parzen', 'gaussian_id_cov', 'gaussian_cov', 'gaussian_cov_ts'] if method == 'compare' else [method]

if dataset == 'noise':
    # Sample a time series consisting of pure white noise
    np.random.seed(0)
    ts = np.random.randn(dim, n)
else:
    import datasets
    ts = datasets.loadSyntheticTestbench()[dataset][0]['ts']

# Retrieve scores for all intervals
pts = preproc.td(ts, td_embed) if td_embed > 1 else ts
scores = dict()
for meth in methods:
    proposals = maxdiv.denseRegionProposals(pts, extint_min_len, extint_max_len)
    if meth == 'gaussian_id_cov_normalized':
        norm_scores = maxdiv.maxdiv_gaussian(pts, proposals, mode = 'I_OMEGA', gaussian_mode = 'ID_COV')
        # Compute theoretical means and standard deviations of the chi^2 distributions
        X = np.arange(extint_min_len, extint_max_len + 1)
        scales = 1.0 / X - 1.0 / (pts.shape[1] - X)
        chi_mean = pts.shape[0] * scales
        chi_sd = np.sqrt(2 * pts.shape[0] * (scales ** 2))
        # Normalize scores
        for i, (a, b, score) in enumerate(norm_scores):
            ind = b - a - extint_min_len
            norm_scores[i] = (a, b, (score - chi_mean[ind]) / chi_sd[ind])
        # Add a constant offset to avoid negative scores
Пример #3
0

if __name__ == '__main__':

    import sys
    method = sys.argv[1] if len(sys.argv) > 1 else 'gaussian_cov_ts'
    propmeth = sys.argv[2] if len(sys.argv) > 2 else 'dense'

    # Load data
    data, dates = read_hpw_csv('HPW_2012_41046.csv')
    data = preproc.normalize_time_series(data)

    # Detect
    if method in ['hotellings_t', 'kde']:
        if method == 'kde':
            scores = baselines_noninterval.pointwiseKDE(preproc.td(data))
        else:
            scores = baselines_noninterval.hotellings_t(preproc.td(data))
        regions = baselines_noninterval.pointwiseScoresToIntervals(scores, 24)
    elif method == 'gaussian_cov_ts':
        regions = maxdiv.maxdiv(data,
                                'gaussian_cov',
                                mode='TS',
                                td_dim=3,
                                td_lag=1,
                                proposals=propmeth,
                                extint_min_len=24,
                                extint_max_len=72,
                                num_intervals=5)
    else:
        regions = maxdiv.maxdiv(data,
Пример #4
0
    else:
        dataset = 'synthetic'
    try:
        data = datasets.loadDatasets(dataset)
    except:
        print('Unknown dataset: {}'.format(dataset))
        exit()
    if ftype not in data:
        print('Unknown extreme type: {}'.format(ftype))
        exit()

    # Detect and plot anomaly boundaries
    for func in data[ftype]:

        # Compute scores for each point
        scores = METHODS[method](preproc.td(func['ts'], embed_dim))

        # Score statistics
        score_mean = np.mean(scores)
        score_sd = np.std(scores)
        score_median = np.median(scores)
        score_mad = 1.4826 * np.median(np.abs(scores - score_median))

        # Compute gradient of scores
        pad = (len(GRAD_FILTER) - 1) // 2
        padded_scores = np.concatenate((scores[:pad], scores, scores[-pad:]))
        score_gradient = np.abs(
            np.convolve(padded_scores, GRAD_FILTER, 'valid'))
        score_gradient_mean = np.mean(score_gradient)
        score_gradient_sd = np.std(score_gradient)
        score_gradient_median = np.median(score_gradient)