def load(self): """Load dataset.""" # load training and test set from separate files X_train, y_train = load_from_tsfile_to_dataframe( self._train_path, return_separate_X_and_y=True) X_test, y_test = load_from_tsfile_to_dataframe( self._test_path, return_separate_X_and_y=True) # combine into single dataframe data_train = pd.concat([X_train, pd.Series(y_train)], axis=1) data_test = pd.concat([X_test, pd.Series(y_test)], axis=1) # rename target variable data_train.rename(columns={data_train.columns[-1]: self._target_name}, inplace=True) data_test.rename(columns={data_test.columns[-1]: self._target_name}, inplace=True) # concatenate the two dataframes, keeping training and test split in # index, necessary for later optional CV data = pd.concat([data_train, data_test], axis=0, keys=["train", "test"]).reset_index(level=1, drop=True) return data
def test_write_dataframe_to_ts_success(tmp_path, dataset): """Tests whether a dataset can be written by the .ts writer then read in.""" # load an example dataset path = os.path.join( os.path.dirname(sktime.__file__), f"datasets/data/{dataset}/{dataset}_TEST.ts", ) test_X, test_y = load_from_tsfile_to_dataframe(path) # output the dataframe in a ts file write_dataframe_to_tsfile( data=test_X, path=tmp_path, problem_name=dataset, class_label=np.unique(test_y), class_value_list=test_y, comment=""" The data was derived from twelve monthly electrical power demand time series from Italy and first used in the paper "Intelligent Icons: Integrating Lite-Weight Data Mining and Visualization into GUI Operating Systems". The classification task is to distinguish days from Oct to March (inclusive) from April to September. """, fold="_transform", ) # load data back from the ts file result = f"{tmp_path}/{dataset}/{dataset}_transform.ts" res_X, res_y = load_from_tsfile_to_dataframe(result) # check if the dataframes are the same assert_frame_equal(res_X, test_X)
def read_ts( filepath: str, **kwargs ) -> Tuple[Union[DenseFunctionalData, IrregularFunctionalData], np.ndarray]: """Read a ts file into Functional Data. Build a DenseFunctionalData or IrregularFunctionalData object upon a ts file passed as parameter. Notes ----- We assumed that the data are unidimensional and is not checked. Parameters ---------- filepath: str Any valid string path is acceptable. **kwargs: Keywords arguments to passed to the load_from_tsfile_to_dataframe function. Returns ------- obj: DenseFunctionalData or IrregularFunctionalData The loaded csv file. labels: np.ndarray Labels """ data, labels = load_from_tsfile_to_dataframe(filepath, **kwargs) len_argavals = data.applymap(len)['dim_0'].unique() if len(len_argavals) == 1: obj = read_ts_dense(data) else: obj = read_ts_irregular(data) return obj, labels
def test_load_from_tsfile_to_dataframe(): """Test the load_from_tsfile_to_dataframe() function.""" # Test that an empty file is classed an invalid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = "" tmp_file.write(file_contents) tmp_file.flush() # Parse the file and assert that it is invalid np.testing.assert_raises(IOError, load_from_tsfile_to_dataframe, path) finally: os.remove(path) # Test that a file with an incomplete set of metadata is invalid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n") tmp_file.write(file_contents) tmp_file.flush() # Parse the file and assert that it is invalid np.testing.assert_raises(IOError, load_from_tsfile_to_dataframe, path) finally: os.remove(path) # Test that a file with a complete set of metadata but no data is invalid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ( "@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel false\n@data") tmp_file.write(file_contents) tmp_file.flush() # Parse the file and assert that it is invalid np.testing.assert_raises(IOError, load_from_tsfile_to_dataframe, path) finally: os.remove(path) # Test that a file with a complete set of metadata and no data but # invalid metadata values is invalid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName\n@timeStamps\n@univariate " "true\n@classLabel false\n@data") tmp_file.write(file_contents) tmp_file.flush() # Parse the file and assert that it is invalid np.testing.assert_raises(IOError, load_from_tsfile_to_dataframe, path) finally: os.remove(path) # Test that a file with a complete set of metadata and a single # case/dimension parses correctly fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel " "false\n@data\n") file_contents += "(0, 1), (1, 2)" tmp_file.write(file_contents) tmp_file.flush() # Parse the file df = load_from_tsfile_to_dataframe(path) # Test the DataFrame returned accurately reflects the data in # the file np.testing.assert_equal(len(df), 1) np.testing.assert_equal(len(df.columns), 1) series = df["dim_0"] np.testing.assert_equal(len(series), 1) series = df["dim_0"][0] np.testing.assert_equal(series[0], 1.0) np.testing.assert_equal(series[1], 2.0) finally: os.remove(path) # Test that a file with a complete set of metadata and 2 cases with 3 # dimensions parses correctly fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel " "false\n@data\n") file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5), (1, 6)\n" file_contents += "(0, 11), (1, 12):(0, 13), (1,14):(0, 15), (1, 16) \n" tmp_file.write(file_contents) tmp_file.flush() # Parse the file df = load_from_tsfile_to_dataframe(path) # Test the DataFrame returned accurately reflects the data in # the file np.testing.assert_equal(len(df), 2) np.testing.assert_equal(len(df.columns), 3) series = df["dim_0"] np.testing.assert_equal(len(series), 2) series = df["dim_0"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 1.0) np.testing.assert_equal(series[1], 2.0) series = df["dim_0"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 11.0) np.testing.assert_equal(series[1], 12.0) series = df["dim_1"] np.testing.assert_equal(len(series), 2) series = df["dim_1"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 3.0) np.testing.assert_equal(series[1], 4.0) series = df["dim_1"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 13.0) np.testing.assert_equal(series[1], 14.0) series = df["dim_2"] np.testing.assert_equal(len(series), 2) series = df["dim_2"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 5.0) np.testing.assert_equal(series[1], 6.0) series = df["dim_2"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 15.0) np.testing.assert_equal(series[1], 16.0) finally: os.remove(path) # Test that a file with a complete set of metadata and time-series of # different length parses correctly fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel " "false\n@data\n") file_contents += "(0, 1), (1, 2):(0, 3):(0, 5), (1, 6)\n" file_contents += "(0, 11), (1, 12):(0, 13), (1,14):(0, 15)\n" tmp_file.write(file_contents) tmp_file.flush() # Parse the file df = load_from_tsfile_to_dataframe(path) # Test the DataFrame returned accurately reflects the data in # the file np.testing.assert_equal(len(df), 2) np.testing.assert_equal(len(df.columns), 3) series = df["dim_0"] np.testing.assert_equal(len(series), 2) series = df["dim_0"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 1.0) np.testing.assert_equal(series[1], 2.0) series = df["dim_0"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 11.0) np.testing.assert_equal(series[1], 12.0) series = df["dim_1"] np.testing.assert_equal(len(series), 2) series = df["dim_1"][0] np.testing.assert_equal(len(series), 1) np.testing.assert_equal(series[0], 3.0) series = df["dim_1"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 13.0) np.testing.assert_equal(series[1], 14.0) series = df["dim_2"] np.testing.assert_equal(len(series), 2) series = df["dim_2"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 5.0) np.testing.assert_equal(series[1], 6.0) series = df["dim_2"][1] np.testing.assert_equal(len(series), 1) np.testing.assert_equal(series[0], 15.0) finally: os.remove(path) # Test that a file with a complete set of metadata and data but an # inconsistent number of dimensions across cases is classed as invalid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel " "false\n@data\n") file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5), (1, 6)\n" file_contents += "(0, 11), (1, 12):(0, 13), (1,14) \n" tmp_file.write(file_contents) tmp_file.flush() # Parse the file and assert that it is invalid np.testing.assert_raises(IOError, load_from_tsfile_to_dataframe, path) finally: os.remove(path) # Test that a file with a complete set of metadata and data but missing # values after a tuple is classed as invalid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel " "false\n@data\n") file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5),\n" tmp_file.write(file_contents) tmp_file.flush() # Parse the file and assert that it is invalid np.testing.assert_raises(IOError, load_from_tsfile_to_dataframe, path) finally: os.remove(path) # Test that a file with a complete set of metadata and data and some # empty dimensions is classed as valid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel " "false\n@data\n") file_contents += "(0, 1), (1, 2): :(0, 5), (1, 6)\n" file_contents += "(0, 11), (1, 12):(0, 13), (1,14) : \n" file_contents += ( "(0, 21), (1, 22):(0, 23), (1,24) : (0,25), (1, 26) \n" ) tmp_file.write(file_contents) tmp_file.flush() # Parse the file df = load_from_tsfile_to_dataframe(path) # Test the DataFrame returned accurately reflects the data in # the file np.testing.assert_equal(len(df), 3) np.testing.assert_equal(len(df.columns), 3) series = df["dim_0"] np.testing.assert_equal(len(series), 3) series = df["dim_0"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 1.0) np.testing.assert_equal(series[1], 2.0) series = df["dim_0"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 11.0) np.testing.assert_equal(series[1], 12.0) series = df["dim_0"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 21.0) np.testing.assert_equal(series[1], 22.0) series = df["dim_1"] np.testing.assert_equal(len(series), 3) series = df["dim_1"][0] np.testing.assert_equal(len(series), 0) series = df["dim_1"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 13.0) np.testing.assert_equal(series[1], 14.0) series = df["dim_1"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 23.0) np.testing.assert_equal(series[1], 24.0) series = df["dim_2"] np.testing.assert_equal(len(series), 3) series = df["dim_2"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 5.0) np.testing.assert_equal(series[1], 6.0) series = df["dim_2"][1] np.testing.assert_equal(len(series), 0) series = df["dim_2"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 25.0) np.testing.assert_equal(series[1], 26.0) finally: os.remove(path) # Test that a file with a complete set of metadata and data that # contains datetimes as timestamps and has some empty dimensions is # classed as valid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel " "false\n@data\n") file_contents += ("(01/01/2019 00:00:00, 1), (01/02/2019 " "00:00:00, 2) : " " : (01/05/2019 00:00:00, " "5), (01/06/2019 00:00:00, 6)\n") file_contents += ("(01/01/2020 00:00:00, 11), (01/02/2020 " "00:00:00, 12) : (01/03/2020 00:00:00, 13), " "(01/04/2020 00:00:00, 14) : \n") file_contents += ("(01/01/2021 00:00:00, 21), (01/02/2021 " "00:00:00, 22) : (01/03/2021 00:00:00, 23), " "(01/04/2021 00:00:00, 24) : \n") tmp_file.write(file_contents) tmp_file.flush() # Parse the file df = load_from_tsfile_to_dataframe(path) # Test the DataFrame returned accurately reflects the data in # the file np.testing.assert_equal(len(df), 3) np.testing.assert_equal(len(df.columns), 3) series = df["dim_0"] np.testing.assert_equal(len(series), 3) series = df["dim_0"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series["01/01/2019"], 1.0) np.testing.assert_equal(series["01/02/2019"], 2.0) series = df["dim_0"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series["01/01/2020"], 11.0) np.testing.assert_equal(series["01/02/2020"], 12.0) series = df["dim_0"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series["01/01/2021"], 21.0) np.testing.assert_equal(series["01/02/2021"], 22.0) series = df["dim_1"] np.testing.assert_equal(len(series), 3) series = df["dim_1"][0] np.testing.assert_equal(len(series), 0) series = df["dim_1"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series["01/03/2020"], 13.0) np.testing.assert_equal(series["01/04/2020"], 14.0) series = df["dim_1"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series["01/03/2021"], 23.0) np.testing.assert_equal(series["01/04/2021"], 24.0) series = df["dim_2"] np.testing.assert_equal(len(series), 3) series = df["dim_2"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series["01/05/2019"], 5.0) np.testing.assert_equal(series["01/06/2019"], 6.0) series = df["dim_2"][1] np.testing.assert_equal(len(series), 0) series = df["dim_2"][2] np.testing.assert_equal(len(series), 0) finally: os.remove(path) # Test that a file that mixes timestamp conventions is invalid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel " "false\n@data\n") file_contents += ("(01/01/2019 00:00:00, 1), (01/02/2019 " "00:00:00, 2) : " " : (01/05/2019 00:00:00, " "5), (01/06/2019 00:00:00, 6)\n") file_contents += ("(00, 11), (1, 12) : (01/03/2020 00:00:00, 13), " "(01/04/2020 00:00:00, 14) : \n") file_contents += ("(01/01/2021 00:00:00, 21), (01/02/2021 " "00:00:00, 22) : (01/03/2021 00:00:00, 23), " "(01/04/2021 00:00:00, 24) : \n") tmp_file.write(file_contents) tmp_file.flush() # Parse the file and assert that it is invalid np.testing.assert_raises(IOError, load_from_tsfile_to_dataframe, path) finally: os.remove(path) # Test that a file with a complete set of metadata and data but missing # classes is classed as invalid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel true 0 1 " "2\n@data\n") file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5), (1, 6)\n" file_contents += "(0, 11), (1, 12):(0, 13), (1,14):(0, 15), (1, 16) \n" tmp_file.write(file_contents) tmp_file.flush() # Parse the file and assert that it is invalid np.testing.assert_raises(IOError, load_from_tsfile_to_dataframe, path) finally: os.remove(path) # Test that a file with a complete set of metadata and data but invalid # classes is classed as invalid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel true 0 1 " "2\n@data\n") file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5), (1, 6) : 0 \n" file_contents += ( "(0, 11), (1, 12):(0, 13), (1,14):(0, 15), (1, 16) : 3 \n") tmp_file.write(file_contents) tmp_file.flush() # Parse the file and assert that it is invalid np.testing.assert_raises(IOError, load_from_tsfile_to_dataframe, path) finally: os.remove(path) # Test that a file with a complete set of metadata and data with classes # is classed as valid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "true\n@univariate true\n@classLabel true 0 1 " "2\n@data\n") file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5), (1, 6): 0\n" file_contents += ( "(0, 11), (1, 12):(0, 13), (1,14):(0, 15), (1, 16): 2 \n") tmp_file.write(file_contents) tmp_file.flush() # Parse the file df, y = load_from_tsfile_to_dataframe(path) # Test the DataFrame of X values returned accurately reflects # the data in the file np.testing.assert_equal(len(df), 2) np.testing.assert_equal(len(df.columns), 3) series = df["dim_0"] np.testing.assert_equal(len(series), 2) series = df["dim_0"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 1.0) np.testing.assert_equal(series[1], 2.0) series = df["dim_0"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 11.0) np.testing.assert_equal(series[1], 12.0) series = df["dim_1"] np.testing.assert_equal(len(series), 2) series = df["dim_1"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 3.0) np.testing.assert_equal(series[1], 4.0) series = df["dim_1"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 13.0) np.testing.assert_equal(series[1], 14.0) series = df["dim_2"] np.testing.assert_equal(len(series), 2) series = df["dim_2"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 5.0) np.testing.assert_equal(series[1], 6.0) series = df["dim_2"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 15.0) np.testing.assert_equal(series[1], 16.0) # Test that the class values are as expected np.testing.assert_equal(len(y), 2) np.testing.assert_equal(y[0], "0") np.testing.assert_equal(y[1], "2") finally: os.remove(path) # Test that a file with a complete set of metadata and data, with no # timestamps, is classed as valid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "false\n@univariate true\n@classLabel " "false\n@data\n") file_contents += "1,2:3,4:5,6\n" file_contents += "11,12:13,14:15,16\n" file_contents += "21,22:23,24:25,26\n" tmp_file.write(file_contents) tmp_file.flush() # Parse the file df = load_from_tsfile_to_dataframe(path) # Test the DataFrame returned accurately reflects the data in # the file np.testing.assert_equal(len(df), 3) np.testing.assert_equal(len(df.columns), 3) series = df["dim_0"] np.testing.assert_equal(len(series), 3) series = df["dim_0"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 1.0) np.testing.assert_equal(series[1], 2.0) series = df["dim_0"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 11.0) np.testing.assert_equal(series[1], 12.0) series = df["dim_0"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 21.0) np.testing.assert_equal(series[1], 22.0) series = df["dim_1"] np.testing.assert_equal(len(series), 3) series = df["dim_1"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 3.0) np.testing.assert_equal(series[1], 4.0) series = df["dim_1"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 13.0) np.testing.assert_equal(series[1], 14.0) series = df["dim_1"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 23.0) np.testing.assert_equal(series[1], 24.0) series = df["dim_2"] np.testing.assert_equal(len(series), 3) series = df["dim_2"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 5.0) np.testing.assert_equal(series[1], 6.0) series = df["dim_2"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 15.0) np.testing.assert_equal(series[1], 16.0) series = df["dim_2"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 25.0) np.testing.assert_equal(series[1], 26.0) finally: os.remove(path) # Test that a file with a complete set of metadata and data, with no # timestamps and some empty dimensions, is classed as valid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "false\n@univariate true\n@classLabel " "false\n@data\n") file_contents += "1,2::5,6\n" file_contents += "11,12:13,14:15,16\n" file_contents += "21,22:23,24:\n" tmp_file.write(file_contents) tmp_file.flush() # Parse the file df = load_from_tsfile_to_dataframe(path) # Test the DataFrame returned accurately reflects the data in # the file np.testing.assert_equal(len(df), 3) np.testing.assert_equal(len(df.columns), 3) series = df["dim_0"] np.testing.assert_equal(len(series), 3) series = df["dim_0"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 1.0) np.testing.assert_equal(series[1], 2.0) series = df["dim_0"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 11.0) np.testing.assert_equal(series[1], 12.0) series = df["dim_0"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 21.0) np.testing.assert_equal(series[1], 22.0) series = df["dim_1"] np.testing.assert_equal(len(series), 3) series = df["dim_1"][0] np.testing.assert_equal(len(series), 0) series = df["dim_1"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 13.0) np.testing.assert_equal(series[1], 14.0) series = df["dim_1"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 23.0) np.testing.assert_equal(series[1], 24.0) series = df["dim_2"] np.testing.assert_equal(len(series), 3) series = df["dim_2"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 5.0) np.testing.assert_equal(series[1], 6.0) series = df["dim_2"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 15.0) np.testing.assert_equal(series[1], 16.0) series = df["dim_2"][2] np.testing.assert_equal(len(series), 0) finally: os.remove(path) # Test that a file with a complete set of metadata and data, with no # timestamps and some empty dimensions and classes, is classed as valid fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp_file: # Write the contents of the file file_contents = ("@problemName Test Problem\n@timeStamps " "false\n@univariate true\n@classLabel true cat " "bear dog\n@data\n") file_contents += "1,2::5,6:cat \n" file_contents += "11,12:13,14:15,16: dog\n" file_contents += "21,22:23,24:: bear \n" tmp_file.write(file_contents) tmp_file.flush() # Parse the file df, y = load_from_tsfile_to_dataframe(path) # Test the DataFrame of X values returned accurately reflects # the data in the file np.testing.assert_equal(len(df), 3) np.testing.assert_equal(len(df.columns), 3) series = df["dim_0"] np.testing.assert_equal(len(series), 3) series = df["dim_0"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 1.0) np.testing.assert_equal(series[1], 2.0) series = df["dim_0"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 11.0) np.testing.assert_equal(series[1], 12.0) series = df["dim_0"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 21.0) np.testing.assert_equal(series[1], 22.0) series = df["dim_1"] np.testing.assert_equal(len(series), 3) series = df["dim_1"][0] np.testing.assert_equal(len(series), 0) series = df["dim_1"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 13.0) np.testing.assert_equal(series[1], 14.0) series = df["dim_1"][2] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 23.0) np.testing.assert_equal(series[1], 24.0) series = df["dim_2"] np.testing.assert_equal(len(series), 3) series = df["dim_2"][0] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 5.0) np.testing.assert_equal(series[1], 6.0) series = df["dim_2"][1] np.testing.assert_equal(len(series), 2) np.testing.assert_equal(series[0], 15.0) np.testing.assert_equal(series[1], 16.0) series = df["dim_2"][2] np.testing.assert_equal(len(series), 0) # Test that the class values are as expected np.testing.assert_equal(len(y), 3) np.testing.assert_equal(y[0], "cat") np.testing.assert_equal(y[1], "dog") np.testing.assert_equal(y[2], "bear") finally: os.remove(path)