async def _extract_data(conn: asyncpg.Connection, stream: DataStream, callback, decimation_level: int = 1, start: int = None, end: int = None, block_size=50000): if decimation_level > 1: layout = stream.decimated_layout else: layout = stream.layout table_name = "data.stream%d" % stream.id if decimation_level > 1: table_name += "_%d" % decimation_level # extract by interval query = "SELECT time FROM data.stream%d_intervals " % stream.id query += psql_helpers.query_time_bounds(start, end) try: boundary_records = await conn.fetch(query) except asyncpg.UndefinedTableError: # no data tables data = np.array([], dtype=pipes.compute_dtype(layout)) await callback(data, layout, decimation_level) return boundary_records += [{'time': end}] for i in range(len(boundary_records)): record = boundary_records[i] end = record['time'] # extract the interval data done = False while not done: query = "SELECT * FROM %s " % table_name query += psql_helpers.query_time_bounds(start, end) query += " ORDER BY time ASC LIMIT %d" % block_size psql_bytes = BytesIO() try: await conn.copy_from_query(query, format='binary', output=psql_bytes) except asyncpg.UndefinedTableError: # interval table exists but not the data table data = np.array([], dtype=pipes.compute_dtype(layout)) await callback(data, layout, decimation_level) return psql_bytes.seek(0) dtype = pipes.compute_dtype(layout) np_data = psql_helpers.bytes_to_data(psql_bytes, dtype) await callback(np_data, layout, decimation_level) if len(np_data) < block_size: break start = np_data['timestamp'][-1] + 1 # do not put an interval token at the end of the data if i < len(boundary_records) - 1: await callback(pipes.interval_token(layout), layout, decimation_level) start = end
async def extract(self, stream: 'EventStream', start: Optional[int] = None, end: Optional[int] = None, json_filter=None, limit=None) -> List[Dict]: if end is not None and start is not None and end <= start: raise ValueError( "Invalid time bounds start [%d] must be < end [%d]" % (start, end)) query = "SELECT id, time, end_time, content FROM data.events " where_clause = psql_helpers.query_time_bounds(start, end) if len(where_clause) == 0: where_clause = "WHERE " else: where_clause += " AND " where_clause += "event_stream_id=%d" % stream.id if json_filter is not None and len(json_filter) > 0: where_clause += " AND " + psql_helpers.query_event_json( json_filter) query += where_clause if limit is not None: assert limit > 0, "limit must be > 0" if start is None and end is not None: query += " ORDER BY time DESC" else: query += " ORDER BY time ASC" query += f" LIMIT {limit}" else: query += " ORDER BY time ASC" async with self.pool.acquire() as conn: records = await conn.fetch(query) events = list(map(record_to_event, records)) events.sort(key=lambda e: e["start_time"]) return events
async def remove(self, stream: 'DataStream', start: Optional[int], end: Optional[int], exact: bool = True): where_clause = psql_helpers.query_time_bounds(start, end) async with self.pool.acquire() as conn: tables = await psql_helpers.get_table_names(conn, stream) for table in tables: # TODO: use drop chunks with newer and older clauses when timescale is updated # ******DROP CHUNKS IS *VERY* APPROXIMATE********* if start is None and "intervals" not in table and not exact: # use the much faster drop chunks utility and accept the approximate result bounds = await psql_helpers.convert_time_bounds( conn, stream, start, end) if bounds is None: return # no data to remove query = "SELECT drop_chunks('%s', older_than=>'%s'::timestamp)" % ( table, bounds[1]) else: query = 'DELETE FROM %s ' % table + where_clause try: await conn.execute(query) except asyncpg.UndefinedTableError: return # no data to remove except asyncpg.exceptions.RaiseError as err: print("psql: ", err) return # create an interval boundary to mark the missing data if start is not None: await psql_helpers.close_interval(conn, stream, start)
async def remove(self, stream: 'EventStream', start: Optional[int] = None, end: Optional[int] = None, json_filter=None): query = "DELETE FROM data.events " where_clause = psql_helpers.query_time_bounds(start, end) if len(where_clause) == 0: where_clause = "WHERE " else: where_clause += " AND " where_clause += "event_stream_id=%d" % stream.id if json_filter is not None and len(json_filter) > 0: where_clause += " AND " + psql_helpers.query_event_json( json_filter) query += where_clause async with self.pool.acquire() as conn: return await conn.execute(query)
async def count(self, stream: 'EventStream', start: Optional[int] = None, end: Optional[int] = None, json_filter=None) -> int: if end is not None and start is not None and end <= start: raise ValueError( "Invalid time bounds start [%d] must be < end [%d]" % (start, end)) query = "SELECT count(*) FROM data.events " where_clause = psql_helpers.query_time_bounds(start, end) if len(where_clause) == 0: where_clause = "WHERE " else: where_clause += " AND " where_clause += "event_stream_id=%d" % stream.id if json_filter is not None and len(json_filter) > 0: where_clause += " AND " + psql_helpers.query_event_json( json_filter) query += where_clause async with self.pool.acquire() as conn: record = await conn.fetch(query) return record[0]['count']