default=datasets.TYPES) parser.add_argument( '--ignore_missing', help= 'Ignore functions from the dataset which no detections are present for', action='store_true') parser.add_argument( '--num_samples', help='Number of samples for permutation and bootstrap test', type=int, default=1000) args = parser.parse_args() # Load dataset data = datasets.loadDatasets(args.datasets, args.extremetypes) # Load detections det1 = loadDetectionDump(args.dump1) det2 = loadDetectionDump(args.dump2) # Associate detections with each other and with ground-truth det = alignDetections(det1, det2, data, not args.ignore_missing, quiet=False) # Run significance tests and print results results = testSignificance(det, args.num_samples, printProgress=True) printSignificanceResults(results)
parser.add_argument('--subsets', help='subsets of the datasets to be tested', nargs='+', default=[]) parser.add_argument('--td_lag', help='Time-Lag for Time-Delay Embedding', default=1, type=int) parser.add_argument( '--dump', help='Dump detections for each time-series to the specified CSV file', default='') args = parser.parse_args() # Load data data = datasets.loadDatasets(args.datasets) ftypes = args.subsets if len(args.subsets) > 0 else data.keys() # Find the best embedding dimension for every single time series aucs = {} aps = {} all_ids = [] all_gt = [] all_regions = [] best_k = {} for ftype in ftypes: print('-- {} --'.format(ftype)) func_ids = [] ygts = [] regions = []
PROPOSALS = sys.argv[2] if len(sys.argv) > 2 else 'dense' TD_DIM = int(sys.argv[3]) if len(sys.argv) > 3 else 6 TD_LAG = int(sys.argv[4]) if len(sys.argv) > 4 else 2 if PROPOSALS == 'help': print( 'Test several deseasonalization methods in combination with different variants of the MDI algorithm on a given dataset.\n' ) print( 'Usage: {} <dataset = yahoo | synthetic> <proposals = dense> <td-dim = 6> <td-lag = 2>' .format(sys.argv[0])) exit() # Load test data if DATASET == 'synthetic': data = datasets.loadDatasets('synthetic_seasonal')['diurnal'] else: data = datasets.loadDatasets('yahoo_real')['A1Benchmark'] # Check libmaxdiv if libmaxdiv_wrapper.libmaxdiv is None: raise RuntimeError('libmaxdiv could not be found and loaded.') # Compile pipelines pipelines = OrderedDict() pipelines['none'] = [] pipelines['OLS'] = [] pipelines['Z-Score'] = [] params = libmaxdiv_wrapper.maxdiv_params_t() libmaxdiv_wrapper.libmaxdiv.maxdiv_init_params(params) params.min_size[0] = 20 if DATASET == 'synthetic' else 10
from maxdiv import preproc, eval from maxdiv.baselines_noninterval import pointwiseRegionProposals import datasets # Constants PROPMETHODS = ['hotellings_t', 'kde'] THS = np.concatenate((np.linspace(0, 2, 20, endpoint = False), np.linspace(2, 4, 9, endpoint = True))) # Parse parameters dataset = sys.argv[1] if len(sys.argv) > 1 else 'synthetic' extint_max_len = max(10, int(sys.argv[2])) if len(sys.argv) > 2 else 100 td_dim = max(1, int(sys.argv[3])) if len(sys.argv) > 3 else 1 td_lag = max(1, int(sys.argv[4])) if len(sys.argv) > 4 else 1 # Load test data data = datasets.loadDatasets(dataset, 'interval') # Try different thresholds for interval proposing results = OrderedDict() for propmeth in PROPMETHODS: results[propmeth] = OrderedDict() for sd_th in THS: ygts = [] regions = [] for ftype in data: for func in data[ftype]: ygts.append(func['gt']) ts = preproc.normalize_time_series(func['ts']) if td_dim > 1: ts = preproc.td(ts, td_dim, td_lag)
action='store_true') args = parser.parse_args() # Prepare parameters for calling maxdiv args_dict = vars(args) parameters = { parameter_name: args_dict[parameter_name] for parameter_name in cli_tools.get_algorithm_parameters() if (parameter_name in args_dict) and (parameter_name != 'td_dim') } if ('num_intervals' in parameters) and (parameters['num_intervals'] <= 0): parameters['num_intervals'] = None # Load synthetic test data data = datasets.loadDatasets('synthetic') # Determine set of extreme types to run tests for extremetypes = set(args.extremetypes) & set(data.keys()) if len(extremetypes) == 0: extremetypes = data.keys() # Try all combinations of preprocessing methods, density estimators and divergence modes # for all types of extremes and store the results in dictionaries auc = {} # Area under ROC curve auc_sd = {} # Standard deviation of AUC scores aps = {} # Average Precision times = { } # Lists of runtimes for time series of different length and each method labels = {} # Labels for the different combinations all_gt = {}
PROPMETHODS = ['hotellings_t', 'kde'] METHOD = 'gaussian_cov' MODE = 'CROSSENT' MAD = [True, False] FILTERED = [True, False] THS = np.concatenate( (np.linspace(0, 2, 20, endpoint=False), np.linspace(2, 4, 9, endpoint=True))) propmeth = sys.argv[1] if (len(sys.argv) > 1) and ( sys.argv[1] in PROPMETHODS) else PROPMETHODS[0] dataset = sys.argv[2] if len(sys.argv) > 2 else 'synthetic' # Load test data data = datasets.loadDatasets(dataset) # Try different parameter combinations for interval proposing ap = OrderedDict() # Average Precision mean_ap = OrderedDict() # Mean Average Precision labels = OrderedDict() # Labels for the different combinations for filtered in FILTERED: for useMAD in MAD: id = (filtered, useMAD) ap[id] = {} mean_ap[id] = {} labels[id] = '{}, {}'.format('median' if useMAD else 'mean', 'gradients' if filtered else 'scores') print('Testing {}'.format(labels[id])) sys.stdout.flush()