def get_trace(self, idx, await_labels=False): with SafeH5(self.traces_store_fn, 'r') as fh: tup = fh['/traces/' + idx][()] dummy = np.array([]) if await_labels else np.zeros(tup.shape[1]) while True: with SafeH5(self.predict_store_fn, 'r') as fh: pred = fh.get('/' + idx, dummy)[()] if len(pred): break # todo somehow signal that this example has to be classified fast # sleep(0.1) # if not self.manual_table.loc[idx, 'is_labeled']: # self.label_dict[idx] = pred return pd.DataFrame(data=np.vstack((tup, pred)).T, columns=colnames_alex_w_labels)
def accuracy(self): """ Return array of per-trace accuracy values and mean accuracy over entire dataset :return: """ if not len(self.label_dict): return np.array([np.nan], dtype=float), np.nan try: with SafeH5(self.predict_store_fn, 'r') as fh: pred_dict = { idx: fh.get('/' + idx, None)[()] for idx in self.label_dict } except: cp = 1 nb_correct = np.array([ np.sum(self.label_dict[idx] == pred_dict[idx]) for idx in self.label_dict if pred_dict[idx] is not None ]) nb_points = np.array([ len(self.label_dict[idx]) for idx in self.label_dict if pred_dict[idx] is not None ]) return nb_correct / nb_points * 100, nb_correct.sum() / nb_points.sum( ) * 100
def init_table(self): # Create index table self.index_table = pd.DataFrame(columns=[ 'trace', 'eps', 'l', 'd', 'gamma', 'data_timestamp', 'logprob', 'mod_timestamp' ]).set_index('trace') self.manual_table = df_empty( columns=['trace', 'is_labeled', 'is_junk'], dtypes=[str, np.bool, np.bool]).set_index('trace') with SafeHDFStore(self.traces_store_fn, 'a') as fh: fh.put('index_table', value=self.index_table, format='table', append=True) # make traces group with SafeH5(self.traces_store_fn, 'a') as fh: fh.create_group('traces') # hdf5 file for transfer to file parser self.data_timestamp = numeric_timestamp() with SafeH5(self.toparse_fn, 'w') as fh: (fh.attrs['data_timestamp'], fh.attrs['framerate'], fh.attrs['eps'], fh.attrs['l'], fh.attrs['d'], fh.attrs['gamma'], fh.attrs['alex'], fh.attrs['traceswitch']) = (self.data_timestamp, self.framerate, self.eps, self.l, self.d, self.gamma, self.alex, self.traceswitch) # hdf5 file for transfer to predictor with SafeH5(self.predict_store_fn, 'w') as fh: pass fp_process = Process(target=FileParser, args=(self.toparse_fn, self.traces_store_fn, self.main_process), name='file_parser') fp_process.start() return fp_process
def run(self): killer = GracefulKiller() while not killer.kill_now: self.check_mod_update() if self.classifier.trained is None: continue # Check for data to predict with SafeHDFStore(self.traces_store_fn) as fh: if 'index_table' in fh: index_table = fh.get('index_table') else: index_table = None if index_table is None: continue pred_idx = index_table.index[ index_table.mod_timestamp != self.classifier.timestamp][:self.chunk_size] if not len(pred_idx): continue index_table = index_table.loc[pred_idx, :] # predict state_seq_dict = {} for idx in pred_idx: trace_df = self.get_trace(idx) state_seq_dict[idx], index_table.loc[ idx, 'logprob'] = self.classifier.predict(trace_df) index_table.mod_timestamp = self.classifier.timestamp # Save new predictions with SafeH5(self.predict_store_fn, 'a') as fh: for idx in state_seq_dict: if idx in fh: fh[idx][:] = state_seq_dict[idx] else: fh[idx] = state_seq_dict[idx] with SafeHDFStore(self.traces_store_fn) as fh: fh.remove('index_table', where='index in index_table.index') # todo check fh.append('index_table', index_table, append=True, data_columns=True) sys.exit(0)
def d(self, d): self.data_timestamp = numeric_timestamp() with SafeH5(self.toparse_fn, 'a') as fh: fh.attrs['d'] = d fh.attrs['data_timestamp'] = self.data_timestamp self._d = d
def l(self, l): self.data_timestamp = numeric_timestamp() with SafeH5(self.toparse_fn, 'a') as fh: fh.attrs['l'] = l fh.attrs['data_timestamp'] = self.data_timestamp self._l = l
def eps(self, eps): self.data_timestamp = numeric_timestamp() with SafeH5(self.toparse_fn, 'a') as fh: fh.attrs['eps'] = eps fh.attrs['data_timestamp'] = self.data_timestamp self._eps = eps
def traceswitch(self, traceswitch): self.data_timestamp = numeric_timestamp() with SafeH5(self.toparse_fn, 'a') as fh: fh.attrs['traceswitch'] = traceswitch fh.attrs['data_timestamp'] = self.data_timestamp self._traceswitch = traceswitch
def alex(self, alex): self.data_timestamp = numeric_timestamp() with SafeH5(self.toparse_fn, 'a') as fh: fh.attrs['alex'] = alex fh.attrs['data_timestamp'] = self.data_timestamp self._alex = alex
def framerate(self, framerate): self.data_timestamp = numeric_timestamp() with SafeH5(self.toparse_fn, 'a') as fh: fh.attrs['framerate'] = framerate fh.attrs['data_timestamp'] = self.data_timestamp self._framerate = framerate
def add_tuple(self, content, fn): _, b64_contents = content.split(",", 1) # remove the prefix that JS adds file_contents = base64.b64decode(b64_contents) with SafeH5(self.toparse_fn, 'a') as fh: fh[fn] = np.void(file_contents)
def gamma(self, gamma): self.data_timestamp = numeric_timestamp() with SafeH5(self.toparse_fn, 'a') as fh: fh.attrs['gamma'] = gamma fh.attrs['data_timestamp'] = self.data_timestamp self._gamma = gamma
def get_trace(self, idx): with SafeH5(self.traces_store_fn, 'r') as fh: tup = fh['/traces/' + idx][()] cn = colnames if tup.shape[0] == 10 else colnames_alex return pd.DataFrame(data=tup.T, columns=cn)