Пример #1
0
    def read_nowait(self, flatten=False):
        """
        Same as read but this is not a coroutine. This should only be used for unit testing.

        Args:
            flatten:

        Returns:
            numpy.ndarray

        >>> data = pipe.read_nowait()
        [1, 2, 3]

        """
        if self._failed:
            raise PipeError('pipe failed')

        # if reread is set just return the old data
        if self._reread:
            self._reread = False
            if len(self.read_buffer) == 0:
                raise PipeError("No data left to reread")
            return self._format_data(self.read_buffer, flatten)

        # if the queue is empty and we have old data, just return the old data
        if self.queue.empty() and len(self.read_buffer) > 0:
            return self._format_data(self.read_buffer, flatten)

        # if the buffer is empty and the queue is empty and the pipe is closed
        if self.queue.empty() and len(self.read_buffer) == 0 and self.closed:
            raise EmptyPipe()
        # do not wait for new data, return an empty array if nothing else is available
        return self._read(flatten)
Пример #2
0
    async def close_interval(self):
        """
        Signal a break in the data stream. This should be used to indicate missing data.
        Data returned from :meth:`read` will be chunked by interval boundaries.

        """
        if self.direction == Pipe.DIRECTION.INPUT:
            raise PipeError("cannot write to an input pipe")
        raise PipeError("abstract method must be implemented by child")
Пример #3
0
    async def flush_cache(self):
        """
        Force a pipe flush even if the cache is not full. Raises an error if caching is not
        enabled.

        """
        if self.direction == Pipe.DIRECTION.INPUT:
            raise PipeError("cannot control cache on input pipes")
        raise PipeError("abstract method must be implemented by child")
Пример #4
0
 async def close_interval(self):
     if self._failed:
         raise PipeError('pipe failed')
     if self.closed:
         raise PipeError("Cannot write to a closed pipe")
     if self.debug:
         print("[%s:write] closing interval" % self.name)
     if self._caching:
         await self.flush_cache()
     await self.queue.put(None)
Пример #5
0
    def reread_last(self):
        """
        The next read will return only unconsumed data from the previous read
        and no new data from the source. The end_of_interval flag is maintained.

        """
        if self.direction == Pipe.DIRECTION.OUTPUT:
            raise PipeError("cannot read from an output pipe")

        raise PipeError("Not Implemented")
Пример #6
0
 def _validate_data(data):
     if type(data) is not np.ndarray:
         raise PipeError("invalid data type must be a structured array or 2D matrix")
     # check for valid data type
     try:
         if (len(data) == 0) or len(data[0]) == 0:
             log.info("pipe write called with no data")
             return False
     except TypeError:
         raise PipeError("invalid data type must be a structured array or 2D matrix")
     return True
Пример #7
0
 def consume(self, num_rows):
     if num_rows == 0:
         return  # nothing to do
     if num_rows < 0:
         raise PipeError("consume called with negative offset: %d" %
                         num_rows)
     if num_rows > self.last_index:
         raise PipeError("cannot consume %d rows: only %d available" %
                         (num_rows, self.last_index))
     self.buffer = np.roll(self.buffer, -1 * num_rows)
     self.last_index -= num_rows
Пример #8
0
    def close_nowait(self):
        """
        Same as close but this is not a coroutine. This should only be used for
        unit testing

        """
        if len(self.subscribers) > 0:
            raise PipeError("cannot close_nowait subscribers, use async")
        if self.close_cb is not None:
            raise PipeError("close_cb cannot be executed, use async")
        self.closed = True
Пример #9
0
 def consume(self, num_rows):
     if num_rows == 0:
         return
     if num_rows < 0:
         raise PipeError("consume called with negative offset: %d" %
                         num_rows)
     if num_rows > len(self.read_buffer):
         raise PipeError("cannot consume %d rows: only %d available" %
                         (num_rows, len(self.read_buffer)))
     if self.debug:
         print("[%s:read] consumed %d rows" % (self.name, num_rows))
     self.read_buffer = self.read_buffer[num_rows:]
Пример #10
0
    async def read_all(self, flatten=False, maxrows=1e5, error_on_overflow=False) -> np.ndarray:
        """
                Read stream data. By default this method returns a structured
                array with ``timestamp`` and ``data`` fields. The pipe is automatically closed.
                This method is a coroutine.

                Args:
                    flatten: return an unstructured array (flat 2D matrix) with timestamps in the first column
                    maxrows: the maximum number of rows to read from the pipe
                    error_on_overflow: raise a PipeError exception if pipe is not empty after reading maxrows

                Returns:
                    numpy.ndarray

                >>> data = await pipe.read_all(flatten=True)
                [1, 2, 3]
        """
        if self.direction == Pipe.DIRECTION.OUTPUT:
            raise PipeError("cannot read from an output pipe")

        data = None
        while True:
            try:
                new_data = await self.read(flatten)
                self.consume(len(new_data))
            except PipeError:
                break
            if data is None:
                data = new_data
                if len(data) > maxrows:
                    await self.close()
                    if error_on_overflow:
                        raise PipeError("More than [%d] rows, increase maxrows or disable error_on_overflow" % maxrows)
                    return data[:maxrows]
            else:
                if len(data) + len(new_data) > maxrows:
                    await self.close()
                    if error_on_overflow:
                        raise PipeError("More than [%d] rows, increase maxrows or disable error_on_overflow" % maxrows)
                    remaining_rows = maxrows - len(data)
                    if flatten:
                        data = np.vstack((data, new_data[:remaining_rows]))
                    else:
                        data = np.hstack((data, new_data[:remaining_rows]))
                    break
                if flatten:
                    data = np.vstack((data, new_data))
                else:
                    data = np.hstack((data, new_data))
        if data is None:
            raise PipeError("No data in pipe")
        return data
Пример #11
0
    def consume(self, num_rows):
        """
        Flush data from the read buffer. The next call to :meth:`read` will
        return any unflushed data followed by new incoming data.

        Args:
            num_rows: number of rows to flush from the read buffer

        """

        if self.direction == Pipe.DIRECTION.OUTPUT:
            raise PipeError("cannot consume from an output pipe")
        raise PipeError("abstract method must be implemented by child")
Пример #12
0
    def enable_cache(self, lines: int):
        """
        Turn on caching for pipe writes. Data is only transmitted once the cache is full.
        This improves system performance especially if :meth:`write` is called
        rapidly with short arrays. Once enabled, caching cannot be disabled.

        Args:
            lines: cache size

        """
        if self.direction == Pipe.DIRECTION.INPUT:
            raise PipeError("cannot control cache on input pipes")
        raise PipeError("abstract method must be implemented by child")
Пример #13
0
    async def write(self, data):
        """
        Write timestamped data to the pipe. Timestamps must be monotonically increasing
        and should not overlap with existing stream data in the database. This method is a coroutine.

        Args:
            data (numpy.ndarray): May be a structured array with ``timestamp`` and ``data`` fields
            or an unstructured array with timestamps in the first column.

        >>> await pipe.write([[1000, 2, 3],[1001, 3, 4]])

        """
        if self.direction == Pipe.DIRECTION.INPUT:
            raise PipeError("cannot write to an input pipe")
        raise PipeError("abstract method must be implemented by child")
Пример #14
0
    async def read(self, flatten=False):
        if flatten:
            raise Exception("Not Implemented")
        if self._reread:
            self._reread = False
            if self.last_block is None or len(self.last_block) == 0:
                raise PipeError("No data left to reread")
            return self.last_block

        if len(self.data_blocks) == 0 and self.last_block is None:
            raise EmptyPipeError()
        if len(self.data_blocks) != 0:
            block = self.data_blocks.popleft()
            if len(self.data_blocks) == 0:
                self.interval_break = True
                self._last_read = True
            elif self.data_blocks[0] is None:
                self.data_blocks.popleft()
                self.interval_break = True
            else:
                self.interval_break = False

            if self.last_block is not None:
                self.last_block = np.hstack((self.last_block, block))
            else:
                self.last_block = block
        return self.last_block
Пример #15
0
 async def flush_cache(self):
     if self.closed:
         raise PipeError("Cannot write to a closed pipe")
     if self._cache_index > 0:
         await self._write(self._cache[:self._cache_index])
         self._cache_index = 0
         self._cache = np.empty(len(self._cache), self.dtype)
Пример #16
0
 async def close_interval(self):
     if self.closed:
         raise PipeError("Pipe is closed")
     if self.writer is None:
         return  # nothing has been written yet so nothing to close
     if self._caching:
         await self.flush_cache()
     self.writer.write(interval_token(self.layout).tobytes())
     await self.writer.drain()
Пример #17
0
    async def read(self, flatten=False) -> np.ndarray:

        if self._failed:
            await self.close()
            raise PipeError('pipe failed')

        # if reread is set just return the old data
        if self._reread:
            self._reread = False
            if len(self.read_buffer) == 0:
                raise PipeError("No data left to reread")
            return self._format_data(self.read_buffer, flatten)

        self._interval_break = False

        # if the queue is empty and we have old data, just return the old data
        # THIS IS REMOVED, OTHERWISE THE WRITER CAN BE STARVED AND NEVER CLOSE THE PIPE
        # if self.queue.empty() and len(self.read_buffer) > 0:
        #    await asyncio.sleep(self.TIMEOUT_INTERVAL)
        #    return self._format_data(self.read_buffer, flatten)

        # otherwise wait for at least one block
        while self.queue.empty():
            # if self._last_read:
            #    raise EmptyPipe()  # trying to re-read old data
            # if the buffer is empty and the queue is empty and the pipe is closed
            if self.queue.empty() and self.closed:
                self._last_read = True  # from now on the is_empty flag will be set
                # but an error will only be generated if all the remaining data is consumed
                break  # return unconsumed data

            await asyncio.sleep(self.TIMEOUT_INTERVAL)
        data_block = self._read(flatten)

        # NOTE: There is a chance read will return an empty array-> if the producer simply closes the existing
        # interval but all of the data is already consumed. This happens typically when a module fails and has to
        # be restarted, then the inserter pipe will have no data (probably already has been read), but the terminating
        # worker adds in an interval closing block [None] to the pipe. But if the producer also closes the pipe there
        # is no reason to pass back empty data so raise an EmptyPipe exception instead

        if len(data_block) == 0 and self.closed:
            raise EmptyPipe()
        return data_block
Пример #18
0
    async def write(self, data: np.ndarray):
        if self._failed:
            await self.close()
            raise PipeError('pipe failed')
        if self.closed:
            raise PipeError("Cannot write to a closed pipe")
        if not self._validate_data(data):
            return
        # convert into a structured array
        sarray = self._apply_dtype(data)

        if self._caching:
            for row in sarray:
                self._cache[self._cache_index] = row
                self._cache_index += 1
                if self._cache_index >= len(self._cache):
                    await self.flush_cache()
        else:
            await self._write(sarray)
Пример #19
0
 def close_interval_nowait(self):
     if self.closed:
         raise PipeError("Pipe is closed")
     if self.writer is None:
         return  # nothing has been written yet so nothing to close
     if self._cache_index > 0:
         log.warning("dumping %d rows of cached data due on %s" %
                     (self._cache_index, self.name))
         self._cache = np.empty(len(self._cache), self.dtype)
         self._cache_index = 0
     self.writer.write(interval_token(self.layout).tobytes())
Пример #20
0
    def close_interval_nowait(self):
        """
        Same as close_interval but this is not a coroutine. This should only be used for
        unit testing

        """
        if self._failed:
            raise PipeError('pipe failed')
        if self.debug:
            print("[%s:write] closing interval" % self.name)
        self.queue.put_nowait(None)
Пример #21
0
    def subscribe(self, pipe):
        if self.direction == Pipe.DIRECTION.INPUT:
            raise PipeError("cannot subscribe to an input pipe")

        self.subscribers.append(pipe)

        def unsubscribe():
            i = self.subscribers.index(pipe)
            del self.subscribers[i]

        return unsubscribe
Пример #22
0
 def _apply_dtype(self, data: np.ndarray) -> np.ndarray:
     """convert [data] to the pipe's [dtype]"""
     if data.ndim == 1:
         # already a structured array just verify its data type
         if data.dtype != self.dtype:
             raise PipeError("wrong dtype for 1D (structured) array" +
                             "[%s] != req type [%s]" % (data.dtype,
                                                        self.dtype))
         return data
     elif data.ndim == 2:
         # Convert to structured array
         sarray = np.zeros(data.shape[0], dtype=self.dtype)
         try:
             sarray['timestamp'] = data[:, 0]
             # Need the squeeze in case sarray['data'] is 1 dimensional
             sarray['data'] = data[:, 1:]
             return sarray
         except (IndexError, ValueError):
             raise PipeError("wrong number of fields for this data type")
     else:
         raise PipeError("wrong number of dimensions in array")
Пример #23
0
    async def read(self, flatten=False) -> np.ndarray:
        """
        Read stream data. By default this method returns a structured
        array with ``timestamp`` and ``data`` fields. This method is a coroutine.

        Args:
            flatten: return an unstructured array (flat 2D matrix) with timestamps in the first column

        Returns:
            numpy.ndarray

        >>> data = await pipe.read()
        [1, 2, 3]
        >>> data = await pipe.read(flatten=True)
        # the same data is returned again
        [1,2,3]
        >>> pipe.consume(len(data))
        # next call to read will return only new data
        """
        if self.direction == Pipe.DIRECTION.OUTPUT:
            raise PipeError("cannot read from an output pipe")

        raise PipeError("abstract method must be implemented by child")
Пример #24
0
    def write_nowait(self, data):
        if self._failed:
            raise PipeError('pipe failed')
        if self.closed:
            raise PipeError("Cannot write to a closed pipe")
        if not self._validate_data(data):
            return

        # convert into a structured array
        sarray = self._apply_dtype(data)
        # send data to subscribers
        for pipe in self.subscribers:
            if type(pipe) is LocalPipe:
                p: LocalPipe = pipe  # to appease type checker
                p.write_nowait(sarray)
            else:
                raise PipeError("cannot write_nowait to subscriber [%s]" %
                                pipe.name)

        self.queue.put_nowait(sarray)
        self.queued_rows += len(sarray)
        if self.debug:
            print("[%s:write] queueing block with [%d] rows" %
                  (self.name, len(sarray)))
Пример #25
0
    async def write(self, data):
        if self.closed:
            raise PipeError("Cannot write to a closed pipe")
        if not self._validate_data(data):
            return
        # make sure dtype is structured
        sdata = self._apply_dtype(data)

        if self._caching:
            for row in sdata:
                self._cache[self._cache_index] = row
                self._cache_index += 1
                if self._cache_index >= len(self._cache):
                    await self.flush_cache()
        else:
            await self._write(sdata)
Пример #26
0
 def reread_last(self):
     if len(self.read_buffer) == 0:
         raise PipeError("No data left to reread")
     self._reread = True
Пример #27
0
 def write_nowait(self, data):
     if self._closed:
         raise PipeError("Cannot write to a closed pipe")
     self.data_blocks.append(data)
Пример #28
0
 def change_layout(self, layout: str):
     raise PipeError("layout cannot be changed")
Пример #29
0
    async def read(self, flatten=False) -> np.ndarray:
        if self.reader is None:
            self.reader, self._reader_close = await self.reader_factory()
        if self.closed:
            # this happens if close is called before the first read
            if self._reader_close is not None:
                self._reader_close()
            raise PipeError("Cannot read from a closed pipe")
        rowbytes = self.dtype.itemsize
        max_rows = self.BUFFER_SIZE - (
            self.last_index + len(self.unprocessed_np_buffer) % rowbytes)

        if max_rows == 0:
            # buffer is full, this must be consumed before a new read
            return self._format_data(self.buffer[:self.last_index], flatten)

        # if reread is set just return the old data
        if self._reread:
            self._reread = False
            if self.last_index == 0:
                raise PipeError("No data left to reread")
            return self._format_data(self.buffer[:self.last_index], flatten)

        # make sure we get at least one full row of data from read (depends on datatype)
        raw = b''
        while True:
            new_data = b''
            if self.reader.at_eof():
                # do not raise an exception, but is_empty() will return True
                #if self._last_read:
                #    raise EmptyPipe()  # this data has already been read once
                if (len(self.unprocessed_np_buffer) == 0
                        and self.last_index == 0):
                    raise EmptyPipe()
                if len(self.unprocessed_np_buffer) == 0:
                    # no new data is coming in, read() will just return
                    # previously viewed data
                    self._last_read = True
                break
            try:
                new_data = await asyncio.wait_for(
                    self.reader.read(max_rows * rowbytes),
                    self.TIMEOUT_INTERVAL)
            except asyncio.TimeoutError:
                pass
            raw += new_data
            if len(raw) < self.dtype.itemsize:
                await asyncio.sleep(0.1)
            else:
                break
        # extra_bytes: number of leftover bytes after % rowbytes
        # byte_buffer: the extra_bytes from the last read
        # unprocessed_np_buffer: data leftover from an interval break in the previous read
        extra_bytes = (len(raw) + len(self.byte_buffer)) % rowbytes

        if extra_bytes > 0:
            np_buffer = self.byte_buffer + raw[:-extra_bytes]
            self.byte_buffer = raw[-extra_bytes:]
        elif len(self.byte_buffer) > 0:
            np_buffer = self.byte_buffer + raw
            self.byte_buffer = b''
        else:  # common case where byte_buffer is empty and no extra bytes
            np_buffer = raw
            self.byte_buffer = b''

        # append unprocessed np_buffer from previous read
        if len(self.unprocessed_np_buffer) > 0:
            self.unprocessed_np_buffer = self.unprocessed_np_buffer + np_buffer
            # check if we can process all the data, if not
            # store the extra in unprocessed_np_buffer
            max_bytes = max_rows * rowbytes
            if len(self.unprocessed_np_buffer) <= max_bytes:
                np_buffer = self.unprocessed_np_buffer
                self.unprocessed_np_buffer = b''
            else:
                np_buffer = self.unprocessed_np_buffer[:max_bytes]
                self.unprocessed_np_buffer = self.unprocessed_np_buffer[
                    max_bytes:]

        # check for an interval
        self.interval_break = False
        loc = find_interval_token(np_buffer, self.layout)
        if loc is not None:
            self.unprocessed_np_buffer = np_buffer[
                loc[1]:] + self.unprocessed_np_buffer
            np_buffer = np_buffer[:loc[0]]
            self.interval_break = True
        data = np.frombuffer(np_buffer, dtype=self.dtype)

        # append data onto buffer
        self.buffer[self.last_index:self.last_index + len(data)] = data

        self.last_index += len(data)
        return self._format_data(self.buffer[:self.last_index], flatten)
Пример #30
0
 def reread_last(self):
     if self.last_index == 0:
         raise PipeError("No data left to reread")
     self._reread = True