def load_plaid(split=None, return_X_y=True): """Load the PLAID time series classification problem and returns X and y. Example of a univariate problem with unequal length series. Parameters ---------- split: None or str{"train", "test"}, optional (default=None) Whether to load the train or test partition of the problem. By default it loads both. return_X_y: bool, optional (default=True) If True, returns (features, target) separately instead of a single dataframe with columns for features and the target. Returns ------- X: pd.DataFrame with m rows and c columns The time series data for the problem with m cases and c dimensions y: numpy array The class labels for each case in X Examples -------- >>> from sktime.datasets import load_plaid >>> X, y = load_plaid() """ name = "PLAID" return _load_dataset(name, split, return_X_y)
def load_japanese_vowels(split=None, return_X_y=True): """Load the JapaneseVowels time series classification problem. Example of a multivariate problem with unequal length series. Parameters ---------- split: None or str{"train", "test"}, optional (default=None) Whether to load the train or test partition of the problem. By default it loads both. return_X_y: bool, optional (default=True) If True, returns (features, target) separately instead of a single dataframe with columns for features and the target. Returns ------- X: pd.DataFrame with m rows and c columns The time series data for the problem with m cases and c dimensions y: numpy array The class labels for each case in X Examples -------- >>> from sktime.datasets import load_japanese_vowels >>> X, y = load_japanese_vowels() Notes ----- Dimensionality: multivariate, 12 Series length: 7-29 Train cases: 270 Test cases: 370 Number of classes: 9 A UCI Archive dataset. 9 Japanese-male speakers were recorded saying the vowels 'a' and 'e'. A '12-degree linear prediction analysis' is applied to the raw recordings to obtain time-series with 12 dimensions and series lengths between 7 and 29. The classification task is to predict the speaker. Therefore, each instance is a transformed utterance, 12*29 values with a single class label attached, [1...9]. The given training set is comprised of 30 utterances for each speaker, however the test set has a varied distribution based on external factors of timing and experimental availability, between 24 and 88 instances per speaker. Reference: M. Kudo, J. Toyama and M. Shimbo. (1999). "Multidimensional Curve Classification Using Passing-Through Regions". Pattern Recognition Letters, Vol. 20, No. 11--13, pages 1103--1111. Dataset details: http://timeseriesclassification.com/description.php ?Dataset=JapaneseVowels """ name = "JapaneseVowels" return _load_dataset(name, split, return_X_y)
def load_gunpoint(split=None, return_X_y=True): """Load the GunPoint time series classification problem and returns X and y. Parameters ---------- split: None or str{"train", "test"}, optional (default=None) Whether to load the train or test partition of the problem. By default it loads both. return_X_y: bool, optional (default=True) If True, returns (features, target) separately instead of a single dataframe with columns for features and the target. Returns ------- X: pd.DataFrame with m rows and c columns The time series data for the problem with m cases and c dimensions y: numpy array The class labels for each case in X Examples -------- >>> from sktime.datasets import load_gunpoint >>> X, y = load_gunpoint() Notes ----- Dimensionality: univariate Series length: 150 Train cases: 50 Test cases: 150 Number of classes: 2 This dataset involves one female actor and one male actor making a motion with their hand. The two classes are: Gun-Draw and Point: For Gun-Draw the actors have their hands by their sides. They draw a replicate gun from a hip-mounted holster, point it at a target for approximately one second, then return the gun to the holster, and their hands to their sides. For Point the actors have their gun by their sides. They point with their index fingers to a target for approximately one second, and then return their hands to their sides. For both classes, we tracked the centroid of the actor's right hands in both X- and Y-axes, which appear to be highly correlated. The data in the archive is just the X-axis. Dataset details: http://timeseriesclassification.com/description.php ?Dataset=GunPoint """ name = "GunPoint" return _load_dataset(name, split, return_X_y)
def load_acsf1(split=None, return_X_y=True): """Load dataset on power consumption of typical appliances. Parameters ---------- split: None or str{"train", "test"}, optional (default=None) Whether to load the train or test partition of the problem. By default it loads both. return_X_y: bool, optional (default=True) If True, returns (features, target) separately instead of a single dataframe with columns for features and the target. Returns ------- X: pd.DataFrame with m rows and c columns The time series data for the problem with m cases and c dimensions y: numpy array The class labels for each case in X Examples -------- >>> from sktime.datasets import load_acsf1 >>> X, y = load_acsf1() Notes ----- Dimensionality: univariate Series length: 1460 Train cases: 100 Test cases: 100 Number of classes: 10 The dataset contains the power consumption of typical appliances. The recordings are characterized by long idle periods and some high bursts of energy consumption when the appliance is active. The classes correspond to 10 categories of home appliances; mobile phones (via chargers), coffee machines, computer stations (including monitor), fridges and freezers, Hi-Fi systems (CD players), lamp (CFL), laptops (via chargers), microwave ovens, printers, and televisions (LCD or LED)." Dataset details: http://www.timeseriesclassification.com/description.php?Dataset =ACSF1 """ name = "ACSF1" return _load_dataset(name, split, return_X_y)
def load_arrow_head(split=None, return_X_y=True): """ Load the ArrowHead time series classification problem and returns X and y. Parameters ---------- split: None or str{"train", "test"}, optional (default=None) Whether to load the train or test partition of the problem. By default it loads both. return_X_y: bool, optional (default=True) If True, returns (features, target) separately instead of a single dataframe with columns for features and the target. Returns ------- X: pandas DataFrame with m rows and c columns The time series data for the problem with m cases and c dimensions y: numpy array The class labels for each case in X Notes ----- Dimensionality: univariate Series length: 251 Train cases: 36 Test cases: 175 Number of classes: 3 The arrowhead data consists of outlines of the images of arrowheads. The shapes of the projectile points are converted into a time series using the angle-based method. The classification of projectile points is an important topic in anthropology. The classes are based on shape distinctions such as the presence and location of a notch in the arrow. The problem in the repository is a length normalised version of that used in Ye09shapelets. The three classes are called "Avonlea", "Clovis" and "Mix"." Dataset details: http://timeseriesclassification.com/description.php ?Dataset=ArrowHead """ name = "ArrowHead" return _load_dataset(name, split, return_X_y)
def load_osuleaf(split=None, return_X_y=True): """Load the OSULeaf time series classification problem and returns X and y. Parameters ---------- split: None or str{"train", "test"}, optional (default=None) Whether to load the train or test partition of the problem. By default it loads both. return_X_y: bool, optional (default=True) If True, returns (features, target) separately instead of a single dataframe with columns for features and the target. Returns ------- X: pd.DataFrame with m rows and c columns The time series data for the problem with m cases and c dimensions y: numpy array The class labels for each case in X Examples -------- >>> from sktime.datasets import load_osuleaf >>> X, y = load_osuleaf() Notes ----- Dimensionality: univariate Series length: 427 Train cases: 200 Test cases: 242 Number of classes: 6 The OSULeaf data set consist of one dimensional outlines of leaves. The series were obtained by color image segmentation and boundary extraction (in the anti-clockwise direction) from digitized leaf images of six classes: Acer Circinatum, Acer Glabrum, Acer Macrophyllum, Acer Negundo, Quercus Garryanaand Quercus Kelloggii for the MSc thesis "Content-Based Image Retrieval: Plant Species Identification" by A Grandhi. Dataset details: http://www.timeseriesclassification.com/description.php ?Dataset=OSULeaf """ name = "OSULeaf" return _load_dataset(name, split, return_X_y)
def load_UCR_UEA_dataset(name, split=None, return_X_y=True, extract_path=None): """Load dataset from UCR UEA time series archive. Downloads and extracts dataset if not already downloaded. Data is assumed to be in the standard .ts format: each row is a (possibly multivariate) time series. Each dimension is separated by a colon, each value in a series is comma separated. For examples see sktime.datasets.data.tsc. ArrowHead is an example of a univariate equal length problem, BasicMotions an equal length multivariate problem. Parameters ---------- name : str Name of data set. If a dataset that is listed in tsc_dataset_names is given, this function will look in the extract_path first, and if it is not present, attempt to download the data from www.timeseriesclassification.com, saving it to the extract_path. split : None or str{"train", "test"}, optional (default=None) Whether to load the train or test partition of the problem. By default it loads both into a single dataset, otherwise it looks only for files of the format <name>_TRAIN.ts or <name>_TEST.ts. return_X_y : bool, optional (default=False) it returns two objects, if False, it appends the class labels to the dataframe. extract_path : str, optional (default=None) the path to look for the data. If no path is provided, the function looks in `sktime/datasets/data/`. Returns ------- X: pd.DataFrame The time series data for the problem with n_cases rows and either n_dimensions or n_dimensions+1 columns. Columns 1 to n_dimensions are the series associated with each case. If return_X_y is False, column n_dimensions+1 contains the class labels/target variable. y: numpy array, optional The class labels for each case in X, returned separately if return_X_y is True, or appended to X if False Examples -------- >>> from sktime.datasets import load_UCR_UEA_dataset >>> X, y = load_UCR_UEA_dataset(name="Yoga") """ return _load_dataset(name, split, return_X_y, extract_path)
def load_italy_power_demand(split=None, return_X_y=True): """Load ItalyPowerDemand time series classification problem. Parameters ---------- split: None or str{"train", "test"}, optional (default=None) Whether to load the train or test partition of the problem. By default it loads both. return_X_y: bool, optional (default=True) If True, returns (features, target) separately instead of a single dataframe with columns for features and the target. Returns ------- X: pd.DataFrame with m rows and c columns The time series data for the problem with m cases and c dimensions y: numpy array The class labels for each case in X Examples -------- >>> from sktime.datasets import load_italy_power_demand >>> X, y = load_italy_power_demand() Notes ----- Dimensionality: univariate Series length: 24 Train cases: 67 Test cases: 1029 Number of classes: 2 The data was derived from twelve monthly electrical power demand time series from Italy and first used in the paper "Intelligent Icons: Integrating Lite-Weight Data Mining and Visualization into GUI Operating Systems". The classification task is to distinguish days from Oct to March (inclusive) from April to September. Dataset details: http://timeseriesclassification.com/description.php?Dataset=ItalyPowerDemand """ name = "ItalyPowerDemand" return _load_dataset(name, split, return_X_y)
def load_basic_motions(split=None, return_X_y=True): """ Load the BasicMotions time series classification problem and returns X and y. Parameters ---------- split: None or str{"train", "test"}, optional (default=None) Whether to load the train or test partition of the problem. By default it loads both. return_X_y: bool, optional (default=True) If True, returns (features, target) separately instead of a single dataframe with columns for features and the target. Returns ------- X: pandas DataFrame with m rows and c columns The time series data for the problem with m cases and c dimensions y: numpy array The class labels for each case in X Notes ----- Dimensionality: multivariate, 6 Series length: 100 Train cases: 40 Test cases: 40 Number of classes: 4 The data was generated as part of a student project where four students performed four activities whilst wearing a smart watch. The watch collects 3D accelerometer and a 3D gyroscope It consists of four classes, which are walking, resting, running and badminton. Participants were required to record motion a total of five times, and the data is sampled once every tenth of a second, for a ten second period. Dataset details: http://www.timeseriesclassification.com/description.php?Dataset =BasicMotions """ name = "BasicMotions" return _load_dataset(name, split, return_X_y)
def load_unit_test(split=None, return_X_y=True): """ Load UnitTest time series classification problem. This problem is a stripped down version of the ChinaTown problem that is used in correctness tests for classification. Parameters ---------- split: None or str{"train", "test"}, optional (default=None) Whether to load the train or test partition of the problem. By default it loads both. return_X_y: bool, optional (default=True) If True, returns (features, target) separately instead of a single dataframe with columns for features and the target. Returns ------- X: pandas DataFrame with m rows and c columns The time series data for the problem with m cases and c dimensions y: numpy array The class labels for each case in X Details ------- This is the Chinatown problem with a smaller test set, useful for rapid tests. See http://timeseriesclassification.com/description.php?Dataset=Chinatown for the full dataset Dimensionality: univariate Series length: 24 Train cases: 20 Test cases: 22 (full dataset has 345) Number of classes: 2 """ name = "UnitTest" return _load_dataset(name, split, return_X_y)