Exemplos de loadDatasets em Python, exemplos de datasets.loadDatasets em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_significance.py Projeto: zhuyiche/libmaxdiv

                        default=datasets.TYPES)
    parser.add_argument(
        '--ignore_missing',
        help=
        'Ignore functions from the dataset which no detections are present for',
        action='store_true')
    parser.add_argument(
        '--num_samples',
        help='Number of samples for permutation and bootstrap test',
        type=int,
        default=1000)

    args = parser.parse_args()

    # Load dataset
    data = datasets.loadDatasets(args.datasets, args.extremetypes)

    # Load detections
    det1 = loadDetectionDump(args.dump1)
    det2 = loadDetectionDump(args.dump2)

    # Associate detections with each other and with ground-truth
    det = alignDetections(det1,
                          det2,
                          data,
                          not args.ignore_missing,
                          quiet=False)

    # Run significance tests and print results
    results = testSignificance(det, args.num_samples, printProgress=True)
    printSignificanceResults(results)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: optimize_td_embedding.py Projeto: zhuyiche/libmaxdiv

parser.add_argument('--subsets',
                    help='subsets of the datasets to be tested',
                    nargs='+',
                    default=[])
parser.add_argument('--td_lag',
                    help='Time-Lag for Time-Delay Embedding',
                    default=1,
                    type=int)
parser.add_argument(
    '--dump',
    help='Dump detections for each time-series to the specified CSV file',
    default='')
args = parser.parse_args()

# Load data
data = datasets.loadDatasets(args.datasets)
ftypes = args.subsets if len(args.subsets) > 0 else data.keys()

# Find the best embedding dimension for every single time series
aucs = {}
aps = {}
all_ids = []
all_gt = []
all_regions = []
best_k = {}
for ftype in ftypes:
    print('-- {} --'.format(ftype))

    func_ids = []
    ygts = []
    regions = []

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_deseasonalization.py Projeto: zhuyiche/libmaxdiv

PROPOSALS = sys.argv[2] if len(sys.argv) > 2 else 'dense'
TD_DIM = int(sys.argv[3]) if len(sys.argv) > 3 else 6
TD_LAG = int(sys.argv[4]) if len(sys.argv) > 4 else 2

if PROPOSALS == 'help':
    print(
        'Test several deseasonalization methods in combination with different variants of the MDI algorithm on a given dataset.\n'
    )
    print(
        'Usage: {} <dataset = yahoo | synthetic> <proposals = dense> <td-dim = 6> <td-lag = 2>'
        .format(sys.argv[0]))
    exit()

# Load test data
if DATASET == 'synthetic':
    data = datasets.loadDatasets('synthetic_seasonal')['diurnal']
else:
    data = datasets.loadDatasets('yahoo_real')['A1Benchmark']

# Check libmaxdiv
if libmaxdiv_wrapper.libmaxdiv is None:
    raise RuntimeError('libmaxdiv could not be found and loaded.')

# Compile pipelines
pipelines = OrderedDict()
pipelines['none'] = []
pipelines['OLS'] = []
pipelines['Z-Score'] = []
params = libmaxdiv_wrapper.maxdiv_params_t()
libmaxdiv_wrapper.libmaxdiv.maxdiv_init_params(params)
params.min_size[0] = 20 if DATASET == 'synthetic' else 10

Exemplo n.º 4

0

Exibir arquivo

from maxdiv import preproc, eval
from maxdiv.baselines_noninterval import pointwiseRegionProposals
import datasets

# Constants
PROPMETHODS = ['hotellings_t', 'kde']
THS = np.concatenate((np.linspace(0, 2, 20, endpoint = False), np.linspace(2, 4, 9, endpoint = True)))

# Parse parameters
dataset = sys.argv[1] if len(sys.argv) > 1 else 'synthetic'
extint_max_len = max(10, int(sys.argv[2])) if len(sys.argv) > 2 else 100
td_dim = max(1, int(sys.argv[3])) if len(sys.argv) > 3 else 1
td_lag = max(1, int(sys.argv[4])) if len(sys.argv) > 4 else 1

# Load test data
data = datasets.loadDatasets(dataset, 'interval')

# Try different thresholds for interval proposing
results = OrderedDict()
for propmeth in PROPMETHODS:
    results[propmeth] = OrderedDict()
    for sd_th in THS:
        ygts = []
        regions = []
        
        for ftype in data:
            for func in data[ftype]:
                ygts.append(func['gt'])
                ts = preproc.normalize_time_series(func['ts'])
                if td_dim > 1:
                    ts = preproc.td(ts, td_dim, td_lag)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: run_synthetic_benchmarks.py Projeto: zhuyiche/libmaxdiv

                    action='store_true')

args = parser.parse_args()

# Prepare parameters for calling maxdiv
args_dict = vars(args)
parameters = {
    parameter_name: args_dict[parameter_name]
    for parameter_name in cli_tools.get_algorithm_parameters()
    if (parameter_name in args_dict) and (parameter_name != 'td_dim')
}
if ('num_intervals' in parameters) and (parameters['num_intervals'] <= 0):
    parameters['num_intervals'] = None

# Load synthetic test data
data = datasets.loadDatasets('synthetic')

# Determine set of extreme types to run tests for
extremetypes = set(args.extremetypes) & set(data.keys())
if len(extremetypes) == 0:
    extremetypes = data.keys()

# Try all combinations of preprocessing methods, density estimators and divergence modes
# for all types of extremes and store the results in dictionaries
auc = {}  # Area under ROC curve
auc_sd = {}  # Standard deviation of AUC scores
aps = {}  # Average Precision
times = {
}  # Lists of runtimes for time series of different length and each method
labels = {}  # Labels for the different combinations
all_gt = {}

Exemplo n.º 6

0

Exibir arquivo

Arquivo: optimize_proposals.py Projeto: zhuyiche/libmaxdiv

PROPMETHODS = ['hotellings_t', 'kde']
METHOD = 'gaussian_cov'
MODE = 'CROSSENT'

MAD = [True, False]
FILTERED = [True, False]
THS = np.concatenate(
    (np.linspace(0, 2, 20, endpoint=False), np.linspace(2, 4, 9,
                                                        endpoint=True)))

propmeth = sys.argv[1] if (len(sys.argv) > 1) and (
    sys.argv[1] in PROPMETHODS) else PROPMETHODS[0]
dataset = sys.argv[2] if len(sys.argv) > 2 else 'synthetic'

# Load test data
data = datasets.loadDatasets(dataset)

# Try different parameter combinations for interval proposing
ap = OrderedDict()  # Average Precision
mean_ap = OrderedDict()  # Mean Average Precision
labels = OrderedDict()  # Labels for the different combinations
for filtered in FILTERED:
    for useMAD in MAD:
        id = (filtered, useMAD)
        ap[id] = {}
        mean_ap[id] = {}
        labels[id] = '{}, {}'.format('median' if useMAD else 'mean',
                                     'gradients' if filtered else 'scores')
        print('Testing {}'.format(labels[id]))
        sys.stdout.flush()