def acquire_samples(self, max_samples=0): sample = [NTP4Time.utcnow().to_ntp64(), psutil.cpu_percent()] sample_desc = dict(cols=["time", "cpu_percent"], data=[sample]) return sample_desc
def acquire_samples(self, max_samples=0): ts = time.time() sample = [NTP4Time.utcnow().to_ntp64(), 20 * math.sin(10 * ts) + 5, 10 * math.sin(15 * ts) + 10, random.random()*100] sample_desc = dict(cols=["time", "wave1", "wave2", "random1"], data=[sample]) return sample_desc
def extend_dataset(self, packet): """ Adds values from a data packet to the dataset and updates indexes and metadata """ ingest_ts = NTP4Time.utcnow() num_rows, cur_idx, time_idx_rows = len(packet.data["data"]), 0, [] ds_filename = self._get_ds_filename() data_file = HDFLockingFile(ds_filename, "r+", retry_count=10, retry_wait=0.5) file_closed = False try: if self.ds_layout == DS_LAYOUT_INDIVIDUAL: # Get index values from time var if self.time_var not in packet.data["cols"]: raise BadRequest("Packet has no time") var_ds = data_file["vars/%s" % self.time_var] cur_size, cur_idx = len(var_ds), var_ds.attrs["cur_row"] var_ds.attrs["cur_row"] += num_rows # Fill variables with values from packet or NaN for var_name in self.var_defs_map.keys(): var_ds = data_file["vars/%s" % var_name] if cur_idx + num_rows > cur_size: self._resize_dataset(var_ds, num_rows) if var_name in packet.data["cols"]: data_slice = packet.data["data"][:][var_name] var_ds[cur_idx:cur_idx + num_rows] = data_slice else: # Leave the initial fill value (zeros) #var_ds[cur_idx:cur_idx+num_rows] = [None]*num_rows pass extra_vars = set(packet.data["cols"]) - set( self.var_defs_map.keys()) if extra_vars: log.warn("Data packet had extra vars not in dataset: %s", extra_vars) elif self.ds_layout == DS_LAYOUT_COMBINED: var_ds = data_file["vars/%s" % DS_VARIABLES] cur_size, cur_idx = len(var_ds), var_ds.attrs["cur_row"] if cur_idx + num_rows > cur_size: self._resize_dataset(var_ds, num_rows) ds_var_names = [var_info["name"] for var_info in self.var_defs] pvi = { col_name: col_idx for col_idx, col_name in enumerate(packet.data["cols"]) if col_name in ds_var_names } for row_idx in xrange(num_rows): row_data = packet.data["data"][row_idx] row_vals = tuple(row_data[vn] if vn in pvi else None for vn in ds_var_names) var_ds[cur_idx + row_idx] = row_vals var_ds.attrs["cur_row"] += num_rows # Update time_ingest (ts, begin row, count) ds_tingest = data_file[DS_TIMEINGEST_PATH] if ds_tingest.attrs["cur_row"] + 1 > len(ds_tingest): self._resize_dataset(ds_tingest, 1, INTERNAL_ROW_INCREMENT) ds_tingest[ds_tingest.attrs["cur_row"]] = (ingest_ts.to_np_value(), cur_idx, num_rows) ds_tingest.attrs["cur_row"] += 1 # Update time index self._update_time_index(data_file, num_rows, cur_idx=cur_idx) # Check if pruning is necessary if self.prune_trigger_mode == "on_ingest" and self.prune_mode: file_closed = self._prune_dataset(data_file) #HDF5Tools.dump_hdf5(data_file, with_data=True) except Exception: log.exception("Error extending dataset %s HDF5 file" % self.dataset_id) raise finally: if not file_closed: data_file.close()
def extend_dataset(self, packet): """ Adds values from a data packet to the dataset and updates indexes and metadata """ ingest_ts = NTP4Time.utcnow() num_rows, cur_idx, time_idx_rows = len(packet.data["data"]), 0, [] ds_filename = self._get_ds_filename() data_file = HDFLockingFile(ds_filename, "r+", retry_count=10, retry_wait=0.5) try: if self.ds_layout == DS_LAYOUT_INDIVIDUAL: # Get index values from time var if self.time_var not in packet.data["cols"]: raise BadRequest("Packet has no time") var_ds = data_file["vars/%s" % self.time_var] cur_size, cur_idx = len(var_ds), var_ds.attrs["cur_row"] var_ds.attrs["cur_row"] += num_rows # Fill variables with values from packet or NaN for var_name in self.var_defs_map.keys(): var_ds = data_file["vars/%s" % var_name] if cur_idx + num_rows > cur_size: self._resize_dataset(var_ds, num_rows) if var_name in packet.data["cols"]: data_slice = packet.data["data"][:][var_name] var_ds[cur_idx:cur_idx+num_rows] = data_slice else: # Leave the initial fill value (zeros) #var_ds[cur_idx:cur_idx+num_rows] = [None]*num_rows pass extra_vars = set(packet.data["cols"]) - set(self.var_defs_map.keys()) if extra_vars: log.warn("Data packet had extra vars not in dataset: %s", extra_vars) elif self.ds_layout == DS_LAYOUT_COMBINED: var_ds = data_file["vars/%s" % DS_VARIABLES] cur_size, cur_idx = len(var_ds), var_ds.attrs["cur_row"] if cur_idx + num_rows > cur_size: self._resize_dataset(var_ds, num_rows) ds_var_names = [var_info["name"] for var_info in self.var_defs] pvi = {col_name: col_idx for col_idx, col_name in enumerate(packet.data["cols"]) if col_name in ds_var_names} for row_idx in xrange(num_rows): row_data = packet.data["data"][row_idx] row_vals = tuple(row_data[vn] if vn in pvi else None for vn in ds_var_names) var_ds[cur_idx+row_idx] = row_vals var_ds.attrs["cur_row"] += num_rows # Update time_ingest (ts, begin row, count) ds_tingest = data_file[DS_TIMEINGEST_PATH] if ds_tingest.attrs["cur_row"] + 1 > len(ds_tingest): self._resize_dataset(ds_tingest, 1, INTERNAL_ROW_INCREMENT) ds_tingest[ds_tingest.attrs["cur_row"]] = (ingest_ts.to_np_value(), cur_idx, num_rows) ds_tingest.attrs["cur_row"] += 1 # Update time_idx (every nth row's time) new_idx_row = (cur_idx + num_rows + self.time_idx_step - 1) / self.time_idx_step old_idx_row = (cur_idx + self.time_idx_step - 1) / self.time_idx_step num_tidx_rows = new_idx_row - old_idx_row time_ds = data_file["vars/%s" % (self.time_var if self.ds_layout == DS_LAYOUT_INDIVIDUAL else DS_VARIABLES)] time_idx_rows = [time_ds[idx_row*self.time_idx_step] for idx_row in xrange(old_idx_row, new_idx_row)] if time_idx_rows: ds_tidx = data_file[DS_TIMEIDX_PATH] tidx_cur_row = ds_tidx.attrs["cur_row"] if tidx_cur_row + num_tidx_rows > len(ds_tidx): self._resize_dataset(ds_tidx, num_tidx_rows, INTERNAL_ROW_INCREMENT) ds_tidx[tidx_cur_row:tidx_cur_row+num_tidx_rows] = time_idx_rows ds_tidx.attrs["cur_row"] += num_tidx_rows #HDF5Tools.dump_hdf5(data_file, with_data=True) finally: data_file.close()
def extend_dataset(self, packet): """ Adds values from a data packet to the dataset and updates indexes and metadata """ ingest_ts = NTP4Time.utcnow() num_rows, cur_idx, time_idx_rows = len(packet.data["data"]), 0, [] ds_filename = self._get_ds_filename() data_file = HDFLockingFile(ds_filename, "r+", retry_count=10, retry_wait=0.5) file_closed = False try: if self.ds_layout == DS_LAYOUT_INDIVIDUAL: # Get index values from time var if self.time_var not in packet.data["cols"]: raise BadRequest("Packet has no time") var_ds = data_file["vars/%s" % self.time_var] cur_size, cur_idx = len(var_ds), var_ds.attrs["cur_row"] var_ds.attrs["cur_row"] += num_rows # Fill variables with values from packet or NaN for var_name in self.var_defs_map.keys(): var_ds = data_file["vars/%s" % var_name] if cur_idx + num_rows > cur_size: self._resize_dataset(var_ds, num_rows) if var_name in packet.data["cols"]: data_slice = packet.data["data"][:][var_name] var_ds[cur_idx : cur_idx + num_rows] = data_slice else: # Leave the initial fill value (zeros) # var_ds[cur_idx:cur_idx+num_rows] = [None]*num_rows pass extra_vars = set(packet.data["cols"]) - set(self.var_defs_map.keys()) if extra_vars: log.warn("Data packet had extra vars not in dataset: %s", extra_vars) elif self.ds_layout == DS_LAYOUT_COMBINED: var_ds = data_file["vars/%s" % DS_VARIABLES] cur_size, cur_idx = len(var_ds), var_ds.attrs["cur_row"] if cur_idx + num_rows > cur_size: self._resize_dataset(var_ds, num_rows) ds_var_names = [var_info["name"] for var_info in self.var_defs] pvi = { col_name: col_idx for col_idx, col_name in enumerate(packet.data["cols"]) if col_name in ds_var_names } for row_idx in xrange(num_rows): row_data = packet.data["data"][row_idx] row_vals = tuple(row_data[vn] if vn in pvi else None for vn in ds_var_names) var_ds[cur_idx + row_idx] = row_vals var_ds.attrs["cur_row"] += num_rows # Update time_ingest (ts, begin row, count) ds_tingest = data_file[DS_TIMEINGEST_PATH] if ds_tingest.attrs["cur_row"] + 1 > len(ds_tingest): self._resize_dataset(ds_tingest, 1, INTERNAL_ROW_INCREMENT) ds_tingest[ds_tingest.attrs["cur_row"]] = (ingest_ts.to_np_value(), cur_idx, num_rows) ds_tingest.attrs["cur_row"] += 1 # Update time index self._update_time_index(data_file, num_rows, cur_idx=cur_idx) # Check if pruning is necessary if self.prune_trigger_mode == "on_ingest" and self.prune_mode: file_closed = self._prune_dataset(data_file) # HDF5Tools.dump_hdf5(data_file, with_data=True) except Exception: log.exception("Error extending dataset %s HDF5 file" % self.dataset_id) raise finally: if not file_closed: data_file.close()