def test_standardization(self):
        n_matrix = utils.standardization(self.n_matrix)
        assert_equal(np.hstack((n_matrix.ent_table, n_matrix.att_table[0][n_matrix.kfkds[0]])),
                     (self.m - self.m.mean()) / self.m.std())

        n_matrix = utils.standardization(self.n_matrix, axis=0)
        scaler = preprocess.StandardScaler()
        scaler.fit(self.m)
        assert_equal(np.hstack((n_matrix.ent_table, n_matrix.att_table[0][n_matrix.kfkds[0]])),
                     scaler.transform(self.m))
예제 #2
0
def edr_dist(X, Y, epsilon):
    """Compute the EDR distance between X and Y using Dynamic Programming.

    :param X (array): time series feature array denoted by X
    :param Y (array): time series feature array denoted by Y
    :param epsilon (float): matching threshold
    :returns: distance between X and Y with the best alignment
    :Reference: L. Chen et al., "Robust and Fast Similarity Search for Moving Object Trajectories", 2005.
    """
    X, Y = check_arrays(X, Y)
    X = standardization(X)
    Y = standardization(Y)
    dist = _edr_dist(X, Y, epsilon)
    return dist
예제 #3
0
파일: main.py 프로젝트: Yan19960220/Rocket
def run(training_data, test_data, num_runs=10, num_kernels=100):
    results = np.zeros(num_runs)
    timings = np.zeros([4, num_runs])  # training transform, test transform, training, test

    Y_training, X_training = training_data[:, 0].astype(int), standardization(normalization(training_data[:, 1:]))
    Y_test, X_test = test_data[:, 0].astype(int), standardization(normalization(test_data[:, 1:]))

    for i in range(num_runs):
        input_length = X_training.shape[1]
        kernels = generate_kernels(input_length, num_kernels)

        # -- transform training ------------------------------------------------

        time_a = time.perf_counter()
        X_training_transform = apply_kernels(X_training, kernels)
        time_b = time.perf_counter()
        timings[0, i] = time_b - time_a

        # -- transform test ----------------------------------------------------

        time_a = time.perf_counter()
        X_test_transform = apply_kernels(X_test, kernels)
        time_b = time.perf_counter()
        timings[1, i] = time_b - time_a

        # -- training ----------------------------------------------------------

        time_a = time.perf_counter()
        classifier = RidgeClassifierCV(alphas=10 ** np.linspace(-3, 3, 10), normalize=True)
        classifier.fit(X_training_transform, Y_training)
        time_b = time.perf_counter()
        timings[2, i] = time_b - time_a

        # -- test --------------------------------------------------------------

        time_a = time.perf_counter()
        results[i] = classifier.score(X_test_transform, Y_test)
        time_b = time.perf_counter()
        timings[3, i] = time_b - time_a

    return results, timings
예제 #4
0
def main(args):

    # Fix Seed #
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # Weights and Plots Path #
    paths = [args.weights_path, args.plots_path, args.numpy_path]
    for path in paths:
        make_dirs(path)

    # Prepare Data #
    data = load_data(args.which_data)[args.feature]
    data = data.copy()
    
    #df = pd.DataFrame(data)
  #  display(data)
    
    
       
    df = standardization(data)
    
    print("Hello world!")
    
    val_start = "2020-02-08 01:00:00"
    test_start = "2020-07-19 01:00:00"

    df_train = df.loc[:val_start].copy()
    df_val = df.loc[val_start:test_start].copy()
    df_test = df.loc[test_start:].copy()
    
    
    torch.manual_seed(101)

    
    features = list(df.columns.difference(['value']))

    display(df_train)
    
    train_dataset = SequenceDataset(
        df_train,
        target='value',
        features=features,
        sequence_length=args.seq_length
    )