def create_classifier(self): if self.datasets is None: Print.info("Fetching dataset") self.datasets = list() ds = Session.full_dataset(window_length=self.window_length) ds = ds.reduced_dataset(self.dataset_type) ds = ds.normalize() ds.shuffle() self.datasets.append(ds) pipeline = self.create_pipeline() Print.data(pipeline) ds = self.datasets[0] ds_train, ds_test = ds.split_random() fit_output = pipeline.fit(ds_train.X, ds_train.y) accuracy = pipeline.score(ds_test.X, ds_test.y) Print.info("Accuracy: {}".format(accuracy)) return pipeline
def run(self): print("\n\n") Print.time("Running Experiment {}".format( "" if self.index is None else self.index)) start_time = time.time() try: if self.datasets is None: self.datasets = list() for i in tqdm(range(self.cv_splits), desc="Fetching Datasets"): ds = Session.full_dataset(window_length=self.window_length) ds = ds.reduced_dataset(self.dataset_type) ds = ds.normalize() ds.shuffle() self.datasets.append(ds) if self.multiprocessing: self.run_multi() else: for ds in tqdm(self.datasets, desc="Cross validating"): self.cv_reports.append(self.run_cv(ds)) self.report["success"] = True except Exception as e: print("") Print.warning("Skipping experiment: {}".format(e)) Print.ex(e) self.report["success"] = False return self.report = {**self.report, **avg_dict(self.cv_reports)} self.report["confusion_matrix"] = np.sum( [r["confusion_matrix"] for r in self.cv_reports], 0) self.report["time"]["exp"] = (time.time() - start_time) self.report["accuracies"] = [r["accuracy"] for r in self.cv_reports] self.report["cv_splits"] = self.cv_splits # self.report["feature_vector_length"] = self.feature_vector_length() self.report["dataset_lengths"] = [d.length for d in self.datasets]