Пример #1
0
def main():
    generator = DataGenerator(labeled_data_file=args.labeled_data_file, data_util_file=args.data_util_file,
                              threshold=args.threshold, dt=args.dt, L=args.L, tmin=args.tmin, tmax=args.tmax)
    training_data, test_data = generator.get_data(ts_nth_element=args.ts_nth_element,
                                                                   training_frac=0.7)
    steps = [
        ('extract', RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                                   features=[np.mean, np.std, time_series_slope])),
        ('clf', DecisionTreeClassifier())
    ]
    time_series_tree = Pipeline(steps)
    tsf = TimeSeriesForestClassifier(
        estimator=time_series_tree,
        n_estimators=args.n_estimators,
        criterion='entropy' if args.criterion == 'entropy' else 'gini',
        bootstrap=True,
        oob_score=True,
        random_state=1,
        # n_jobs=4,
        verbose=1
    )
    x = detabularize(pd.DataFrame(training_data[:,1:]))
    try:
        with parallel_backend('threading', n_jobs=args.n_jobs):
            tsf = tsf.fit(x, training_data[:,0])
        with open('{save_file_name}.pickle'.format(save_file_name=args.save_file_name), 'wb') \
                as TimeSeriesForestModel:
            pickle.dump(tsf, TimeSeriesForestModel, protocol=pickle.HIGHEST_PROTOCOL)
    except Exception as ex:
        print(ex)
Пример #2
0
def test_indices(n_components):
    np.random.seed(42)
    X = detabularize(pd.DataFrame(data=np.random.randn(10, 5)))
    X.columns = pd.CategoricalIndex(['col_0'])
    X.index = pd.Int64Index([i+10 for i in range(10)])

    pca = PCATransformer(n_components=n_components)
    Xt = pca.fit_transform(X)

    assert X.columns.equals(Xt.columns)
    assert X.index.equals(Xt.index)
    assert get_time_index(Xt).equals(pd.Int64Index(range(pca.pca.n_components_)))
Пример #3
0
def main():
    L, dt, kdmin, kd = args.L, args.dt, int(
        args.tmin / args.dt), int(args.tmax / args.dt) + 1

    R1, R2 = 6.00, 25.00
    C1, C2 = 2.80, 3.40
    x0, y0, z0 = 0.50, 4.00, 1.00
    NC, NL, NR = 600, 600, 600

    deltaC = (C2 - C1) / (NC - 1)
    deltaR = (R2 - R1) / (NR - 1)

    xyz_points_untrimmed = np.loadtxt(args.data_util_file, delimiter=',')
    xyz_points = np.array([[item[0], item[1], item[2]]
                           for item in xyz_points_untrimmed],
                          dtype=np.double)
    original_untrimmed = np.loadtxt(args.labeled_data_file, delimiter=',')
    cr_point_list = np.array(
        [[item[0], item[1], 0 if item[2] < args.threshold else 1]
         for item in original_untrimmed],
        dtype=np.double)
    cr_point_list = np.concatenate((cr_point_list, xyz_points), axis=1)

    training_len = 18000

    with open(
            '{model_file_name}.pickle'.format(
                model_file_name=args.model_file_name), 'rb') as model_pickle:
        model = pickle.load(model_pickle)

    for i in range(0, 10):
        R = torch.from_numpy(cr_point_list[training_len * i:training_len *
                                           (i + 1), 0]).double().cpu()
        C = torch.from_numpy(cr_point_list[training_len * i:training_len *
                                           (i + 1), 1]).double().cpu()
        x0 = torch.from_numpy(cr_point_list[training_len * i:training_len *
                                            (i + 1), 3]).double().cpu()
        y0 = torch.from_numpy(cr_point_list[training_len * i:training_len *
                                            (i + 1), 4]).double().cpu()
        z0 = torch.from_numpy(cr_point_list[training_len * i:training_len *
                                            (i + 1), 5]).double().cpu()
        x = runge_kutta_4(R, C, x0, y0, z0, training_len, dt, kd, kdmin, L,
                          args.ts_nth_element)
        x = detabularize(pd.DataFrame(x))
        with parallel_backend('threading', n_jobs=args.n_jobs):
            x = model.predict_proba(x)
        with open('{out_file_name}_{i}.pickle'.format(out_file_name=args.out_file_name, i=i), 'wb') \
                as model_probabilities:
            pickle.dump(x,
                        model_probabilities,
                        protocol=pickle.HIGHEST_PROTOCOL)
        del R, C, x0, y0, z0, x
Пример #4
0
def test_output_format_dim(len_series, n_instances, n_components):
    np.random.seed(42)
    X = detabularize(pd.DataFrame(data=np.random.randn(n_instances, len_series)))

    trans = PCATransformer(n_components=n_components)
    Xt = trans.fit_transform(X)

    # Check number of rows and output type.
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == X.shape[0]

    # Check number of principal components in the output.
    assert tabularize(Xt).shape[1] == min(n_components, tabularize(X).shape[1])
Пример #5
0
def test_pca_results(n_components):
    np.random.seed(42)

    # sklearn
    X = pd.DataFrame(data=np.random.randn(10, 5))
    pca = PCA(n_components=n_components)
    Xt1 = pca.fit_transform(X)

    # sktime
    Xs = detabularize(X)
    pca_transform = PCATransformer(n_components=n_components)
    Xt2 = pca_transform.fit_transform(Xs)

    assert np.allclose(np.asarray(Xt1), np.asarray(tabularize(Xt2)))
Пример #6
0
def main():
    #Load arff file into Tuple of size 2.
    #First element has the time-series data in arrays and Second element has the description of the attributes
    TRAIN = arff.loadarff('ItalyPowerDemand_TRAIN.arff')
    TEST = arff.loadarff('ItalyPowerDemand_TEST.arff')
    #Convert the data from the first Tuple elemento to a tabularized dataframe
    df_TRAIN = pd.DataFrame(TRAIN[0])
    df_TEST = pd.DataFrame(TEST[0])

    #Using sktime to handle the data
    print(df_TRAIN.head())
    print('\n Is nested the df above?', is_nested_dataframe(df_TRAIN), '\n')

    #Handling the datasets
    X_train = df_TRAIN.drop('target', axis=1)
    y_train = df_TRAIN['target'].astype(int)
    print(X_train.head(), y_train.head(), '\n')
    X_test = df_TEST.drop('target', axis=1)
    y_test = df_TEST['target'].astype(int)

    #Detabularizing and Nesting X_train, X_test
    X_train_detab = detabularize(X_train)
    X_test_detab = detabularize(X_test)
    print(X_train_detab.head())
    print('Is nested the detabularized df above?',
          is_nested_dataframe(X_train_detab), '\n')

    #The lines above could be simplified with the following method from sktime
    X, y = load_from_arff_to_dataframe('ItalyPowerDemand_TRAIN.arff')
    print(X_train_detab.head(), X.head(), type(y_train), type(y))

    #Classifier algorithm
    knn = KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw")
    knn.fit(X_train_detab, y_train)
    print('The score of the KNN classifier is:',
          round(knn.score(X_test_detab, y_test), 4))
def main():
    generator = DataGenerator(labeled_data_file=args.labeled_data_file, data_util_file=args.data_util_file,
                              threshold=args.threshold, dt=args.dt, L=args.L, tmin=args.tmin, tmax=args.tmax)
    training_data, test_data = generator.get_data(ts_nth_element=args.ts_nth_element,
                                                                   training_frac=0.7)
    knn = KNeighborsTimeSeriesClassifier(n_neighbors=args.n_neighbors, verbose=1, metric="dtw")
    x = detabularize(pd.DataFrame(training_data[:,1:]))
    try:
        with parallel_backend('threading', n_jobs=args.n_jobs):
            knn = knn.fit(x, training_data[:,0])
        with open('{save_file_name}.pickle'.format(save_file_name=args.save_file_name), 'wb') \
                as KNeighborsTimeSeriesModel:
            pickle.dump(knn, KNeighborsTimeSeriesModel, protocol=pickle.HIGHEST_PROTOCOL)
    except Exception as ex:
        print(ex)
Пример #8
0
    def inverse_transform(self, X, y=None):
        """Transform tabular pandas dataframe into nested dataframe.

        Parameters
        ----------
        X : pandas DataFrame
            Tabular dataframe with primitives in cells.
        y : array-like, optional (default=None)

        Returns
        -------
        Xt : pandas DataFrame
            Transformed dataframe with series in cells.
        """
        self.check_is_fitted()
        X = check_X(X)
        Xt = detabularize(X, index=self._index, time_index=self._time_index)
        Xt.columns = self._columns
        return Xt
Пример #9
0
    def transform(self, X, y=None):
        """Concatenate multivariate time series/panel data into long
        univiariate time series/panel
        data by simply concatenating times series in time.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with same number of rows and single
          column
        """
        self.check_is_fitted()
        X = check_X(X)

        # We concatenate by tabularizing all columns and then detabularizing
        # them into a single column
        return detabularize(tabularize(X))
Пример #10
0
    def transform(self, X, y=None):
        """Concatenate multivariate time series/panel data into long univiariate time series/panel
        data by simply concatenating times series in time.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with same number of rows and single column
        """

        check_is_fitted(self, 'is_fitted_')

        if not isinstance(X, pd.DataFrame):
            raise ValueError(
                f"Expected input is a pandas DataFrame, but found {type(X)}")

        Xt = detabularize(tabularize(X))
        return Xt
Пример #11
0
    def inverse_transform(self, X, y=None):
        """Transform tabular pandas dataframe into nested dataframe.

        Parameters
        ----------
        X : pandas DataFrame
            Tabular dataframe with primitives in cells.
        y : array-like, optional (default=None)

        Returns
        -------
        Xt : pandas DataFrame
            Transformed dataframe with series in cells.
        """

        check_is_fitted_in_transform(self, '_time_index')

        # TODO check if for each column, all rows have equal-index series
        if self.check_input:
            validate_X(X)

        Xit = detabularize(X, index=self._index, time_index=self._time_index)
        return Xit
Пример #12
0
def test_pca_kwargs(kwargs):
    np.random.seed(42)
    X = detabularize(pd.DataFrame(data=np.random.randn(10, 5)))
    pca = PCATransformer(n_components=1, **kwargs)
    pca.fit_transform(X)