def demo_loading(): """Test function to check dataset loading of univariate and multivaria problems.""" for i in range(0, len(dataset_lists.univariate)): data_dir = "E:/tsc_ts/" dataset = dataset_lists.univariate[i] trainX, trainY = load_ts(data_dir + dataset + "/" + dataset + "_TRAIN.ts") testX, testY = load_ts(data_dir + dataset + "/" + dataset + "_TEST.ts") print("Loaded " + dataset + " in position " + str(i)) print("Train X shape :") print(trainX.shape) print("Train Y shape :") print(trainY.shape) print("Test X shape :") print(testX.shape) print("Test Y shape :") print(testY.shape) for i in range(16, len(dataset_lists.multivariate)): data_dir = "E:/mtsc_ts/" dataset = dataset_lists.multivariate[i] print("Loading " + dataset + " in position " + str(i) + ".......") trainX, trainY = load_ts(data_dir + dataset + "/" + dataset + "_TRAIN.ts") testX, testY = load_ts(data_dir + dataset + "/" + dataset + "_TEST.ts") print("Loaded " + dataset) print("Train X shape :") print(trainX.shape) print("Train Y shape :") print(trainY.shape) print("Test X shape :") print(testX.shape) print("Test Y shape :") print(testY.shape)
else: # Local run print(" Local Run") data_dir = "../datasets/data/" results_dir = "C:/Temp/Clusterers/" dataset = "UnitTest" clusterer = "kmeans" resample = 0 tf = True clst = TimeSeriesKMeans(n_clusters=2) load_and_run_clustering_experiment( overwrite=True, problem_path=data_dir, results_path=results_dir, cls_name=clusterer, dataset=dataset, resample_id=resample, train_file=tf, clusterer=clst, ) train_X, train_Y = load_ts(data_dir + dataset + "/" + dataset + "_TRAIN.ts") test_X, test_Y = load_ts(data_dir + dataset + "/" + dataset + "_TEST.ts") run_clustering_experiment( train_X, clst, results_path=results_dir + "Temp/", trainY=train_Y, testX=test_X, testY=test_Y, cls_name=clusterer, )
def load_and_run_clustering_experiment( problem_path, results_path, dataset, clusterer, resample_id=0, cls_name=None, overwrite=False, format=".ts", train_file=False, ): """Run a clustering experiment. Method to run a basic experiment and write the results to files called testFold<resampleID>.csv and, if required, trainFold<resampleID>.csv. This version loads the data from file based on a path. The clusterer is always trained on the Parameters ---------- problem_path : str Location of problem files, full path. results_path : str Location of where to write results. Any required directories will be created dataset : str Name of problem. Files must be <problem_path>/<dataset>/<dataset>+ "_TRAIN"+format, same for "_TEST" clusterer : the clusterer cls_name : str, default =None determines what to call the write directory. If None, it is set to type(clusterer).__name__ resample_id : int, default = 0 Seed for resampling. If set to 0, the default train/test split from file is used. Also used in output file name. overwrite : boolean, default = False if False, this will only build results if there is not a result file already present. If True, it will overwrite anything already there. format: string, default = ".ts" Valid formats are ".ts", ".arff", ".tsv" and ".long". For more info on format, see examples/loading_data.ipynb train_file: boolean, default = False whether to generate train files or not. If true, it performs a 10xCV on the train and saves """ if cls_name is None: cls_name = type(clusterer).__name__ # Set up the file path in standard format if not overwrite: full_path = ( str(results_path) + "/" + str(cls_name) + "/Predictions/" + str(dataset) + "/testResample" + str(resample_id) + ".csv" ) if os.path.exists(full_path): build_test = False if train_file: full_path = ( str(results_path) + "/" + str(cls_name) + "/Predictions/" + str(dataset) + "/trainResample" + str(resample_id) + ".csv" ) if os.path.exists(full_path): train_file = False if train_file is False and build_test is False: return # currently only works with .ts trainX, trainY = load_ts(problem_path + dataset + "/" + dataset + "_TRAIN" + format) testX, testY = load_ts(problem_path + dataset + "/" + dataset + "_TEST" + format) if resample_id != 0: trainX, trainY, testX, testY = stratified_resample( trainX, trainY, testX, testY, resample_id ) le = preprocessing.LabelEncoder() le.fit(trainY) trainY = le.transform(trainY) testY = le.transform(testY) run_clustering_experiment( trainX, clusterer, trainY=trainY, testX=testX, testY=testY, cls_name=cls_name, dataset_name=dataset, results_path=results_path, )
def load_and_run_classification_experiment( problem_path, results_path, dataset, classifier, resample_id=0, cls_name=None, overwrite=False, build_train=False, predefined_resample=False, ): """Load a dataset and run a classification experiment. Method to run a basic experiment and write the results to files called testFold<resampleID>.csv and, if required, trainFold<resampleID>.csv. Parameters ---------- problem_path : str Location of problem files, full path. results_path : str Location of where to write results. Any required directories will be created. dataset : str Name of problem. Files must be <problem_path>/<dataset>/<dataset>+"_TRAIN.ts", same for "_TEST". classifier : BaseClassifier Classifier to be used in the experiment, if none is provided one is selected using cls_name using resample_id as a seed. cls_name : str, default = None Name of classifier used in writing results. If none the name is taken from the classifier resample_id : int, default=0 Seed for resampling. If set to 0, the default train/test split from file is used. Also used in output file name. overwrite : bool, default=False If set to False, this will only build results if there is not a result file already present. If True, it will overwrite anything already there. build_train : bool, default=False Whether to generate train files or not. If true, it performs a 10-fold cross-validation on the train data and saves. If the classifier can produce its own estimates, those are used instead. predefined_resample : bool, default=False Read a predefined resample from file instead of performing a resample. If True the file format must include the resample_id at the end of the dataset name i.e. <problem_path>/<dataset>/<dataset>+<resample_id>+"_TRAIN.ts". """ if cls_name is None: cls_name = type(classifier).__name__ # Check which files exist, if both exist, exit build_test = True if not overwrite: full_path = ( results_path + "/" + cls_name + "/Predictions/" + dataset + "/testResample" + str(resample_id) + ".csv" ) if os.path.exists(full_path): build_test = False if build_train: full_path = ( results_path + "/" + cls_name + "/Predictions/" + dataset + "/trainResample" + str(resample_id) + ".csv" ) if os.path.exists(full_path): build_train = False if build_train is False and not build_test: return if predefined_resample: X_train, y_train = load_ts( problem_path + dataset + "/" + dataset + str(resample_id) + "_TRAIN.ts" ) X_test, y_test = load_ts( problem_path + dataset + "/" + dataset + str(resample_id) + "_TEST.ts" ) else: X_train, y_train = load_ts(problem_path + dataset + "/" + dataset + "_TRAIN.ts") X_test, y_test = load_ts(problem_path + dataset + "/" + dataset + "_TEST.ts") if resample_id != 0: X_train, y_train, X_test, y_test = stratified_resample( X_train, y_train, X_test, y_test, resample_id ) run_classification_experiment( X_train, y_train, X_test, y_test, classifier, results_path, cls_name=cls_name, dataset=dataset, resample_id=resample_id, train_file=build_train, test_file=build_test, )
path = "C:/Users/chris/Documents/Masters" data_dir = os.path.abspath(f"{path}/datasets/Univariate_ts/") results_dir = os.path.abspath(f"{path}/results/") dataset = "ElectricDevices" resample = 2 tf = True distance = "msm" else: # Local run print(" Local Run") dataset = "ElectricDevices" data_dir = f"../datasets/data/" results_dir = "./temp" resample = 0 tf = True distance = "msm" train_X, train_Y = load_ts(f"{data_dir}/{dataset}/{dataset}_TRAIN.ts", return_data_type="numpy2d") test_X, test_Y = load_ts(f"{data_dir}/{dataset}/{dataset}_TEST.ts", return_data_type="numpy2d") from sklearn.preprocessing import StandardScaler s = StandardScaler() train_X = s.fit_transform(train_X.T) train_X = train_X.T test_X = s.fit_transform(test_X.T) test_X = test_X.T if tune: window = tune_window(distance, train_X) name = clusterer + "-" + distance + "-tuned" else: name = clusterer + "-" + distance if (distance == "wdtw" or distance == "dwdtw" or distance == "dtw"