def generate_reproduction(self): fn = self.filename("exp_set_reproduction", "md") Print.data(fn) fp = "/".join([self.path, fn]) relevant_keys = list(set(self.exp_set.relevant_keys)) exp_summary = np.empty( shape=[len(self.exp_reports), 3 + len(relevant_keys)], dtype="U25") res = "# Experiment Set Reproduction\n" res += "## Code\n" res += "```\n" code = "params = " code += json.dumps(self.exp_set.reproduction_params(), indent=4) + "\n\n" code += "exp_set = ExperimentSet(cv_splits={}, **params)\n".format( self.exp_set.cv_splits) code += "exp_set.multiprocessing = \"cv\"\n" code += "exp_set.run_experiments()" res += code + "\n" res += "```\n\n" res += "<!--- Figure in LaTeX\n" res += self.python_figure(code) + "\n" res += "--->\n" with open(fp, 'w+') as file: file.write(res)
def plot_training_history(training_history, loss_function="", show=True, save=False): fig = plt.figure(figsize=(16, 6)) loss_ax = fig.add_subplot(121) Print.data(list(training_history.history.keys())) loss_ax.plot(training_history.history['loss'], 'r', linewidth=3.0) loss_ax.plot(training_history.history['val_loss'], 'b', linewidth=3.0) loss_ax.legend(['Training loss', 'Validation Loss'], fontsize=18) loss_ax.set_xlabel('Epochs ', fontsize=16) loss_ax.set_ylabel('Loss', fontsize=16) loss_ax.set_title('Loss Curves: {}'.format(loss_function), fontsize=16) acc_ax = fig.add_subplot(122) acc_ax.plot(training_history.history['acc'], 'r', linewidth=3.0) acc_ax.plot(training_history.history['val_acc'], 'b', linewidth=3.0) acc_ax.legend(['Training Accuracy', 'Validation Accuracy'], fontsize=18) acc_ax.set_xlabel('Epochs ', fontsize=16) acc_ax.set_ylabel('Accuracy', fontsize=16) acc_ax.set_title('Accuracy Curves', fontsize=16) plt.tight_layout() if save: create_path_if_not_existing(Path.plots) fp = "/".join([Path.plots, save]) plt.savefig(fp, format="png", dpi=400) if show: plt.show()
def run_multi(self): working_q = mp.Queue() output_q = mp.Queue() for i in range(len(self.datasets)): working_q.put(i) n_workers = np.min([mp.cpu_count(), self.cv_splits]) print("") Print.info("Using {} workers".format(n_workers)) processes = [ mp.Process(target=self.worker, args=(i, working_q, output_q, self.pipeline)) for i in range(n_workers) ] for proc in processes: proc.start() for proc in processes: proc.join() while True: try: self.cv_reports.append(output_q.get_nowait()) except Empty: break
def run_multi(self, ds_collection): working_q = mp.Queue() output_q = mp.Queue() for exp_params in self.exp_params_list: working_q.put(exp_params) n_workers = np.min([mp.cpu_count(), len(self.exp_params_list)]) Print.info("Using {} workers".format(n_workers)) processes = [ mp.Process(target=self.worker, args=(i, working_q, output_q, self.cv_splits, ds_collection)) for i in range(n_workers) ] for proc in processes: proc.start() for proc in processes: proc.join() while True: try: self.exp_reports.append(output_q.get_nowait()) except Empty: break
def upload_batch(time_frames): url = URL.timeframes r = requests.post(url, json=time_frames) if r.status_code != 201: Print.failure("Failed to upload batch") return False return True
def run(self): is_successful = self.upload_recording_buffer() if is_successful: if not self.keep_buffer: os.remove(self.buffer_path) else: Print.failure("Failed to upload recording buffer")
def stop_recording(self): if self.state == State.RECORDING: self.state = State.IDLE uploader = Uploader(sys_manager.session.id) uploader.start() else: Print.warning("Cannot stop recording because it is not recording.")
def start_recording(self): if self.state == State.IDLE: sys_manager.session = Session.create() # create_path_if_not_existing('{}_{}'.format(Path.recording_buffer, sys_manager.session.id)) self.state = State.RECORDING else: Print.warning("Is already recording.")
def create_experiment_params(self): Print.point("Generating Experiments") for key in param_grid.keys(): if key not in self.params: self.params[key] = param_grid[key] exp_params_list = self.recurse_flatten(self.params) for params in exp_params_list: pipeline_items = params["preprocessor"].split(";") pipeline_items.append(params["classifier"]) self.pipeline_items = list( set(self.pipeline_items + pipeline_items)) for key, val in conditional_param_grid.items(): key = key if key in pipeline_items: if isinstance(val, dict): for val_key, val_val in val.items(): if key in self.params: if val_key in self.params[key]: params[key][val_key] = self.params[key][ val_key] else: params[key][val_key] = val_val else: params[key] = val else: params[key] = self.params[ key] if key in self.params else val else: if key in params: del params[key] exp_params_list = self.recurse_flatten(exp_params_list) # The following two lines remove duplicate configurations out = [] for v in exp_params_list: if v not in out: out.append(v) exp_params_list = out # set_of_jsons = {json.dumps(d, sort_keys=True) for d in exp_params_list} # exp_params_list = [json.loads(t) for t in set_of_jsons] Print.start("") print(pd.DataFrame([flatten_dict(e) for e in exp_params_list])) print("\n\n") self.exp_params_list = exp_params_list
def plot_matrix(m, upscale_lowest_dim=True): if upscale_lowest_dim: min_ratio = 5 h, w = np.shape(m) Print.pandas(m) row_height = 1 if h >= w / min_ratio else int(w / (min_ratio * h)) col_width = 1 if w >= h / min_ratio else int(h / (min_ratio * w)) Print.data(row_height) Print.data(col_width) res = np.zeros([h * row_height, w * col_width]) Print.data(np.shape(res)) if row_height > col_width: for i, row in enumerate(m): res[i * row_height: (i + 1) * row_height, :] = np.tile(row, (row_height, 1)) elif col_width > row_height: for i, col in enumerate(m.T): res[:, i * col_width: (i + 1) * col_width] = np.tile(col, (col_width, 1)).T else: res = m else: res = m cmap = plt.cm.Blues plt.imshow(res, interpolation='nearest', cmap=cmap) plt.colorbar() plt.show()
def full_dataset_gen(cls, window_length, count=1, sessions=None): if sessions is None: Print.info("Fetching sessions") sessions = Session.fetch_all(only_real=True, include_timeframes=True) for _ in range(count): dataset = Dataset.empty() for session in sessions: windows = list(session.window_gen(window_length=window_length)) dataset = dataset + session.dataset(windows=windows) yield dataset
def worker(self, i, working_queue, output_q, pipeline): while True: try: ds_index = working_queue.get_nowait() ds = self.datasets[ds_index] Print.progress("Worker {} is doing a job".format(i)) cv_report = self.run_cv(ds, pipeline) output_q.put(cv_report) except Empty: break return
def worker(i, working_queue, output_q, cv_splits, ds_collection): while True: try: exp_params = working_queue.get_nowait() exp = Experiment.from_params(exp_params) exp.cv_splits = cv_splits exp.set_datasets(ds_collection) Print.progress("{}: Running Experiment".format(i)) exp.run() output_q.put(exp.report) except Empty: Print.info("Queue Empty") break return
def trim_none_seconds(self, sample_trim, return_copy=False): if not self.original_order: Print.warning("Skipped trim_none_seconds since dataset was not in original order") if return_copy: return self.copy() return if isinstance(sample_trim, str): sample_trim = [float(t) for t in sample_trim.split(";")] relabel_seconds = sample_trim[0] remove_seconds = np.max(sample_trim) if remove_seconds == 0: if return_copy: return self.copy() return change_points = [] action_labels = [] for i in range(1, self.length, 1): if self.y[i] != self.y[i - 1]: change_points.append(i) action_labels.append(np.max(self.y[i - 1:i + 1])) relabel_dist = int((SAMPLING_RATE * relabel_seconds) / self.window_length) remove_dist = int((SAMPLING_RATE * remove_seconds) / self.window_length) y_none_mask = self.y == 0 relabel_mask = np.isin(self.range, points_around(change_points, relabel_dist)) relabel_mask = np.logical_and(relabel_mask, y_none_mask) remove_mask = np.isin(self.range, points_around(change_points, remove_dist)) remove_mask = np.logical_and(remove_mask, y_none_mask) remove_mask = np.logical_xor(remove_mask, relabel_mask) for i in self.range: if relabel_mask[i]: self.y[i] = action_labels[find_nearest(change_points, i, return_index=True)] keep_mask = np.invert(remove_mask) if return_copy: return self.copy(keep_mask) else: self.apply_mask(keep_mask)
def create(cls): url = URL.sessions payload = dict( person=sys_manager.person.id, ch_names=ch_names, is_real_data=sys_manager.is_real_data ) r = requests.post(url, data=payload) json_resp = r.json() obj = cls(**json_resp) if r.status_code == 201: Print.api("Created New Session ({})".format(obj.id)) else: Print.failure("Something went wrong") return obj
def split_random(self, include_val=False): if self.is_child: raise Print.build_except("Tried to split a child dataset.", self) split_ratio = np.asarray(self.split_ratio if include_val else [self.split_ratio[1]]) splits = (self.length * split_ratio).astype(int) p = np.random.permutation(self.length) p_parts = np.split(p, splits) return [self.child_from_mask(p) for p in p_parts]
def upload_recording_buffer(self): all_successful = True time_frames = list() line_count = sum(1 for _ in open(self.buffer_path)) ch_count = len(ch_names) with open(self.buffer_path) as infile: for i, line in enumerate(tqdm(infile, total=line_count)): new_time_frame = TimeFrame.from_line(line) if not len(new_time_frame.sensor_data) == ch_count: Print.warning( "Skipped TimeFrame with {} data points".format( len(new_time_frame.sensor_data))) Print.data(new_time_frame.sensor_data) continue time_frames.append(new_time_frame.to_json()) if (i + 1) % batch_size == 0: if not self.upload_batch(time_frames): Print.failure("Failed to upload batch") all_successful = False time_frames = list() self.upload_batch(time_frames) return all_successful
def run_queue(): fns = os.listdir(path) fns = [fn for fn in fns if "done" not in fn] fns.sort(key=lambda x: pri_from_fn(x), reverse=True) fps = ["/".join([path, fn]) for fn in fns if "done" not in fn] for fp in fps: print("\n\n") Print.start("Running ExperimentSet ({})".format(name_from_fn(fp.split("/")[-1]))) with open(fp, "r") as infile: json_data = json.load(infile) exp_set = ExperimentSet(cv_splits=8, **json_data) exp_set.multiprocessing = "cv" exp_set.run_experiments() tokens = fp.split(".") tokens[0] += "_done" new_fp = ".".join(tokens) os.rename(fp, new_fp)
def run_experiments(self, fast_datasets=False): time.sleep(1) start_run_time = time.time() ds_collection = DatasetCollection.from_params(self.params, self.cv_splits, fast=fast_datasets) if self.multiprocessing == "exp": self.run_multi(ds_collection) else: for i, exp_params in enumerate( tqdm(self.exp_params_list, desc="Running Experiments")): exp = Experiment.from_params(exp_params) exp.cv_splits = self.cv_splits exp.index = i exp.set_datasets(ds_collection) exp.multiprocessing = (self.multiprocessing == "cv") exp.run() if self.best_exp is None or exp.report[ "accuracy"] > self.best_exp.report["accuracy"]: Print.good("New best: {}".format( np.round(exp.report["accuracy"], 3))) self.best_exp = exp self.exp_reports.append(exp.report) self.run_time = time.time() - start_run_time self.generate_report() if self.save_best: from sklearn.externals import joblib fp = Path.classifiers + '/' + "classifier1.pkl" joblib.dump(self.best_exp.pipeline, fp)
def create_or_fetch(cls, name, age, gender): url = URL.persons r = requests.post(url, data=dict(name=name, age=age, gender=gender)) print(r) json_resp = r.json() Print.json(json_resp) person = cls(**json_resp) if r.status_code == 201: Print.api("Created New Person: {})".format(person)) elif r.status_code == 200: Print.api("Fetched Existing Person: {}".format(person)) return person
def create_classifier(self): if self.datasets is None: Print.info("Fetching dataset") self.datasets = list() ds = Session.full_dataset(window_length=self.window_length) ds = ds.reduced_dataset(self.dataset_type) ds = ds.normalize() ds.shuffle() self.datasets.append(ds) pipeline = self.create_pipeline() Print.data(pipeline) ds = self.datasets[0] ds_train, ds_test = ds.split_random() fit_output = pipeline.fit(ds_train.X, ds_train.y) accuracy = pipeline.score(ds_test.X, ds_test.y) Print.info("Accuracy: {}".format(accuracy)) return pipeline
def run(self): print("\n\n") Print.time("Running Experiment {}".format( "" if self.index is None else self.index)) start_time = time.time() try: if self.datasets is None: self.datasets = list() for i in tqdm(range(self.cv_splits), desc="Fetching Datasets"): ds = Session.full_dataset(window_length=self.window_length) ds = ds.reduced_dataset(self.dataset_type) ds = ds.normalize() ds.shuffle() self.datasets.append(ds) if self.multiprocessing: self.run_multi() else: for ds in tqdm(self.datasets, desc="Cross validating"): self.cv_reports.append(self.run_cv(ds)) self.report["success"] = True except Exception as e: print("") Print.warning("Skipping experiment: {}".format(e)) Print.ex(e) self.report["success"] = False return self.report = {**self.report, **avg_dict(self.cv_reports)} self.report["confusion_matrix"] = np.sum( [r["confusion_matrix"] for r in self.cv_reports], 0) self.report["time"]["exp"] = (time.time() - start_time) self.report["accuracies"] = [r["accuracy"] for r in self.cv_reports] self.report["cv_splits"] = self.cv_splits # self.report["feature_vector_length"] = self.feature_vector_length() self.report["dataset_lengths"] = [d.length for d in self.datasets]
def generate_overview(self): fn = self.filename("exp_set_overview", "md") Print.data(fn) fp = "/".join([self.path, fn]) relevant_keys = list(set(self.exp_set.relevant_keys)) Print.data(relevant_keys) exp_summary = np.empty( shape=[len(self.exp_reports), 3 + len(relevant_keys)], dtype="U25") res = "# Experiment Set Overview\n" res += "## Performance by relevant params\n\n" param_performances = { param: self.param_performance(param) for param in self.all_relevant_params() } for param_name, param_vals in param_performances.items(): res += "### {}\n\n".format(param_name) param_vals_list = sorted(list(param_vals.items()), key=lambda x: x[1], reverse=True) res += "\n".join([ "* **{}:** {}".format(e[0], np.round(e[1], DECIMALS)) for e in param_vals_list ]) res += "\n\n" res += "\n\n" res += "## Performance Overview\n\n" for i, exp_report in enumerate(self.exp_reports): flat_params = flatten_dict(exp_report["raw_params"]) relevant_params = np.empty(shape=[len(relevant_keys)], dtype="U25") for j, key in enumerate(relevant_keys): if key in flat_params: relevant_params[j] = flat_params[key] else: relevant_params[j] = "-" acc_string = "{}%".format(np.round(100 * exp_report["accuracy"], 1)) kappa_string = "{}".format(np.round(exp_report["kappa"], 3)) time_string = "{}s".format(np.round(exp_report["time"]["exp"], 2)) exp_summary[i, :3] = [acc_string, kappa_string, time_string] exp_summary[i, 3:] = relevant_params df_perf1 = pd.DataFrame(exp_summary, columns=["Accuracy", "Kappa", "Avg Time"] + relevant_keys, copy=True) df_perf1.sort_values(by=["Accuracy"], axis=0, ascending=False, inplace=True) res += tabulate( df_perf1, tablefmt="pipe", headers="keys", showindex=False) + "\n" res += "<!---\nResults in LaTeX\n" res += tabulate( df_perf1, tablefmt="latex", headers="keys", showindex=False) + "\n" res += "--->\n" with open(fp, 'w+') as file: file.write(res)
if sessions is None: Print.info("Fetching sessions") sessions = Session.fetch_all(only_real=True, include_timeframes=True) for _ in range(count): dataset = Dataset.empty() for session in sessions: windows = list(session.window_gen(window_length=window_length)) dataset = dataset + session.dataset(windows=windows) yield dataset if __name__ == '__main__': Print.start("Starting") sessions = Session.fetch_all() session = random.choice(sessions) n_channels = len(session.ch_names) session.fetch_timeframes() X = session.timeframes[:, :n_channels] y = session.timeframes[:, n_channels + 1] Print.data(np.mean(y)) X_pow = X**2 res = np.zeros([len(y), n_channels + 1])
def exit_system(self): self.streamhandler.stop() time.sleep(1) Print.success("Successfully exited program.") sys.exit()
def generate_report(self): print("\n") Print.success("Generating Report") report = Report(self, self.exp_reports) report.generate()
def generate_detail(self): fn = self.filename("exp_set_detail", "md") Print.data(fn) fp = "/".join([self.path, fn]) relevant_keys = list(set(self.exp_set.relevant_keys)) res = "# Experiment Set Detail\n" res += "{}\n\n".format(datestamp_str(self.exp_set.init_time)) res += "* **Runtime:** {}s\n".format(np.round(self.exp_set.run_time, 1)) res += "* **Multiprocessing:** {}\n".format( self.exp_set.multiprocessing) res += "\n\n" if self.exp_set.description: res += "#### Description\n" res += self.exp_set.description + "\n" if self.exp_set.hypothesis: res += "#### Hypothesis\n" res += self.exp_set.hypothesis + "\n" res += "\n\n" res += "## Performance by configuration\n\n" for i, exp_report in enumerate(self.exp_reports): flat_params = flatten_dict(exp_report["raw_params"]) res += "---\n\n" res += "### Entry {} accuracy: {}\n".format( i + 1, np.round(exp_report["accuracy"], DECIMALS)) res += "* **Kappa:** {}\n".format( np.round(exp_report["kappa"], DECIMALS)) res += "* **Average Experiment Time:** {}s\n".format( np.round(exp_report["time"]["exp"], 2)) res += "* **Dataset type:** {}\n".format( exp_report["dataset_type"]) res += "* **Dataset avg length:** {}\n".format( np.round(np.mean(exp_report["dataset_lengths"]), DECIMALS)) # res += "* **Feature Vector Length:** {}\n".format(exp_report["feature_vector_length"]) res += "* **CV Splits:** {}\n".format(exp_report["cv_splits"]) res += "\n" res += "{}\n".format(np.round(exp_report["accuracies"], DECIMALS)) res += "### Config\n" res += "**Relevant Parameters**\n\n" relevant_params = { key: flat_params[key] for key in relevant_keys if key in flat_params } params_df = pd.DataFrame([relevant_params]) res += tabulate( params_df, tablefmt="pipe", headers="keys", showindex=False) + "\n" res += "**All Parameters**\n\n" params_df = pd.DataFrame([flat_params]) res += tabulate(params_df.round(DECIMALS), tablefmt="pipe", headers="keys", showindex=False) + "\n" res += "### Details\n" res += "**Confusion Matrix**\n\n" c_matrix = exp_report["confusion_matrix"] class_names = exp_report["dataset_type"].labels c_matrix_df = pd.DataFrame( c_matrix, columns=["Pred: {}".format(l) for l in class_names], index=["__True: {}__".format(l) for l in class_names]) res += tabulate( c_matrix_df, tablefmt="pipe", headers="keys", showindex=True) + "\n" res += "<!---\nConfusion Matrix in LaTeX\n" res += tabulate( c_matrix_df, tablefmt="latex", headers="keys", showindex=False) + "\n" res += "--->\n" # Formats the confusion matrix as res += "<!---\nConfusion Matrix Raw\n" res += "c_matrix = np.array({})\n".format(format_array(c_matrix)) res += "class_names = {}\n".format(format_array(class_names)) res += "--->\n" # res += "**Report**\n\n" # report = exp_report["report"] # report_df = pd.DataFrame.from_dict(report) # report_key = list(report.keys())[0] # index = ["__{}__".format(key) for key in report[report_key].keys()] # res += tabulate(report_df.round(DECIMALS), tablefmt="pipe", headers="keys", showindex=index) + "\n" res += "**Time**\n\n" time_df = pd.DataFrame([exp_report["time"]]) res += tabulate(time_df.round(DECIMALS), tablefmt="pipe", headers="keys", showindex=False) + "\n" with open(fp, 'w+') as file: file.write(res)
res = np.vstack((cA, cH, cV, cD)) self.sample_shape = np.shape(res) return self if __name__ == '__main__': ds = list(Session.full_dataset_gen(window_length=10))[0] indices = [] for class_idx in [0, 1]: for i, y in enumerate(ds.y): if y == class_idx: indices.append(i) break print(indices) wavelet = DWT(dim=1, wavelet='db1') wavelet.fit(ds.X) for i in indices: sample = ds.X[i] sample_t = wavelet._transform_sample(sample) Print.pandas(sample) Print.pandas(sample_t)