Пример #1
0
    def __init__(self,
                 filebase=None,
                 tail=False,
                 refresh_file_spec=False,
                 retry_interval=0.1,
                 interval=0,
                 use_timestamps=False,
                 date_format=timestamp.DATE_FORMAT):
        """
    filebase     Possibly wildcarded string specifying files to be opened.
                 Special case: if file_spec is None, read from stdin.

    tail         If False, return None upon reaching end of last file; if
                 True, block upon reaching EOF of last file and wait for
                 more records.

    refresh_file_spec
                 If True, refresh the search for matching filenames when
                 reaching last EOF to see if any new matching files have
                 appeared in the interim.

    retry_interval
                 If tail and/or refresh_file_spec are True, how long to
                 wait before looking to see if any new records or files
                 have shown up.

    interval
                 How long to sleep between returning records. In general
                 this should be zero except for debugging purposes.

    Note that the order in which files are opened will probably be in
    alphanumeric by filename, but this is not strictly enforced and
    depends on how glob returns them.
    """
        super().__init__(output_format=Text)

        if interval and use_timestamps:
            raise ValueError(
                'Can not specify both "interval" and "use_timestamps"')

        self.filebase = filebase
        self.use_timestamps = use_timestamps
        self.date_format = date_format

        # If use_timestamps, we need to keep track of our last_read to
        # know how long to sleep
        self.last_timestamp = 0
        self.last_read = 0

        # If they give us a filebase, add wildcard to match its suffixes;
        # otherwise, we'll pass on the empty string to TextFileReader so
        # that it uses stdin. NOTE: we should really use a pattern that
        # echoes timestamp.DATE_FORMAT, e.g.
        # DATE_FORMAT_WILDCARD = '????-??-??'
        self.file_spec = filebase + '*' if filebase else None
        self.reader = TextFileReader(file_spec=self.file_spec,
                                     tail=tail,
                                     refresh_file_spec=refresh_file_spec,
                                     retry_interval=retry_interval,
                                     interval=interval)
Пример #2
0
    def test_basic(self):

        runner = LoggerRunner(self.config, interval=0.1)

        runner_thread = threading.Thread(target=runner.run)
        runner_thread.start()

        time.sleep(0.2)

        # Assure ourselves that the dest file doesn't exist yet and that
        # we're in our default mode
        self.assertFalse(os.path.exists(self.dest_name))
        self.assertEqual(runner.mode, "off")

        with self.assertLogs(logging.getLogger(), logging.WARNING):
            runner.set_mode('nonexistent mode')
        self.assertEqual(runner.mode, "off")

        runner.set_mode('on')
        self.assertEqual(runner.mode, "on")

        time.sleep(0.6)

        reader = TextFileReader(self.dest_name)
        for line in SAMPLE_DATA:
            logging.info('Checking line: "%s"', line)
            self.assertEqual(line, reader.read())

        self.assertTrue(runner.processes['logger'].is_alive())
        pid = runner.processes['logger'].pid

        # Try shutting down
        runner.quit()
        time.sleep(0.2)
Пример #3
0
    def test_basic(self):

        # Assure ourselves that the dest file doesn't exist yet and that
        # we're in our default mode
        self.assertFalse(os.path.exists(self.dest_name))

        runner = LoggerRunner(config=self.config)
        runner.start()
        time.sleep(1.0)

        reader = TextFileReader(self.dest_name)
        for line in SAMPLE_DATA:
            result = reader.read()
            logging.info('Checking line: "%s"', line)
            logging.info('Against line:  "%s"', result)
            self.assertEqual(line, result)

        self.assertTrue(runner.is_runnable())
        self.assertTrue(runner.is_alive())
        self.assertFalse(runner.is_failed())

        runner.quit()
        self.assertFalse(runner.is_alive())

        # Try a degenerate runner
        runner = LoggerRunner(config={})
        runner.start()
        time.sleep(1.0)

        self.assertFalse(runner.is_runnable())
        self.assertFalse(runner.is_alive())
        self.assertFalse(runner.is_failed())
Пример #4
0
    def test_check_format(self):

        # This should be okay - for now it warns us that check_format
        # is not implemented for ComposedWriter
        with self.assertLogs(logging.getLogger(), logging.WARNING):
            with self.assertRaises(ValueError):
                Listener([
                    TextFileReader(self.tmpfilenames[0]),
                    TextFileReader(self.tmpfilenames[1])
                ],
                         check_format=True)
Пример #5
0
    def test_all_files(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')
            for line in expected_lines:
                self.assertEqual(line, reader.read())
            self.assertEqual(None, reader.read())
Пример #6
0
  def test_basic(self):

    # Assure ourselves that the dest file doesn't exist yet and that
    # we're in our default mode
    self.assertFalse(os.path.exists(self.dest_name))

    runner = LoggerRunner(interval=0.1)
    runner_thread = threading.Thread(target=runner.run, daemon=True)
    runner_thread.start()

    runner.set_configs(self.config['modes']['on'])
    #logging.warning('CONFIG: %s', self.config)
    #time.sleep(600)
    time.sleep(1.0)

    reader = TextFileReader(self.dest_name)
    for line in SAMPLE_DATA:
      result = reader.read()
      logging.info('Checking line: "%s"', line)
      logging.info('Against line:  "%s"', result)
      self.assertEqual(line, result)

    self.assertTrue(runner.logger_is_alive('logger'))
    pid = runner.processes['logger'].pid

    status = runner.check_loggers()
    self.assertDictEqual(status,
                         {'logger': {'config': 'unknown',
                                     'errors': [],
                                     'running': True,
                                     'pid': pid,
                                     'failed': False}
                         })
    runner.set_configs(self.config['modes']['off'])
    time.sleep(0.1)
    self.assertDictEqual(runner.check_loggers(),
                         {'logger': {'config': None,
                                     'errors': [],
                                     'running': None,
                                     'pid': None,
                                     'failed': False}
                         })

    # Verify that the process has indeed shut down. This should throw
    # an exception if the process doesn't exist.
    #with self.assertRaises(ProcessLookupError):
    #  os.kill(pid, 0)

    # Try shutting down
    runner.quit()
    runner_thread.join(2.0)
    self.assertFalse(runner_thread.is_alive())
Пример #7
0
    def test_read_range_after_read(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')

            for i in range(5):
                reader.read()

            self.assertEqual(expected_lines[1:4], reader.read_range(1, 4))
    def test_check_format(self):

        # This should be okay
        ComposedReader([
            TextFileReader(self.tmpfilenames[0]),
            TextFileReader(self.tmpfilenames[1])
        ],
                       check_format=True)

        # This should not be - no common reader format
        with self.assertRaises(ValueError):
            ComposedReader([TextFileReader(self.tmpfilenames[0]),
                            Reader()],
                           check_format=True)
Пример #9
0
    def test_empty_lines_okay(self):

        # Create a file
        temp_dir = tempfile.TemporaryDirectory()
        temp_dir_name = temp_dir.name
        test_file = temp_dir_name + '/test.txt'
        logging.info('creating temporary file "%s"', test_file)
        create_file(test_file, SAMPLE_SPACE_DATA)

        # Create a reader to read it at 0.5 second intervals
        reader = TextFileReader(test_file, interval=0.5, tail=True)
        timeout_reader = TimeoutReader(reader,
                                       timeout=1,
                                       empty_is_okay=True,
                                       message='Space Timeout')

        # Our reader should do fine until it gets to the very end after 4 seconds
        start_time = time.time()
        record = timeout_reader.read()
        end_time = time.time()
        self.assertEqual(record, 'Space Timeout')
        self.assertAlmostEqual(end_time - start_time, 3.5, delta=0.3)

        logging.info('Got timeout record "%s" after %g seconds', record,
                     end_time - start_time)
Пример #10
0
    def test_read_all_write_one(self):
        readers = []
        for tmpfilename in self.tmpfilenames:
            readers.append(TextFileReader(tmpfilename, interval=0.2))

        transforms = [PrefixTransform('prefix_1'), PrefixTransform('prefix_2')]

        outfilename = self.tmpdirname + '/f_out'
        writers = [TextFileWriter(outfilename)]

        listener = Listener(readers, transforms, writers)
        listener.run()

        out_lines = []
        with open(outfilename, 'r') as f:
            for line in f.readlines():
                out_lines.append(line.rstrip())
        out_lines.sort()

        source_lines = []
        for f in SAMPLE_DATA:
            source_lines.extend(
                ['prefix_2 prefix_1 ' + f for f in SAMPLE_DATA[f]])
        source_lines.sort()

        logging.debug('out: %s, source: %s', out_lines, source_lines)
        self.assertEqual(out_lines, source_lines)
Пример #11
0
    def _stderr_file_to_cds(self, logger, stderr_file_name):
        """Iteratively read from a file (presumed to be a logger's stderr file
        and send the lines to a cached data server labeled as coming from
        stderr:logger:<logger>.

        Format of error messages is as a JSON-encoded dict of asctime, levelno,
        levelname, filename, lineno and message.

        To be run in a separate thread from _check_logger_stderr_loop
        """

        if not self.data_server_writer:
            logging.error(
                'INTERNAL ERROR: called _stderr_file_to_cds(), but no '
                'cached data server defined?!?')
            return

        field_name = 'stderr:logger:' + logger
        message_format = ('{ascdate:S} {asctime:S} {levelno:d} {levelname:w} '
                          '{filename:w}.py:{lineno:d} {message}')

        # Our caller checked that this file exists, so open with impunity.
        reader = TextFileReader(file_spec=stderr_file_name, tail=True)
        while not self.quit_flag:
            record = reader.read()
            try:
                parsed_fields = parse.parse(message_format, record)
                fields = {
                    'asctime': (parsed_fields['ascdate'] + 'T' +
                                parsed_fields['asctime']),
                    'levelno':
                    parsed_fields['levelno'],
                    'levelname':
                    parsed_fields['levelname'],
                    'filename':
                    parsed_fields['filename'] + '.py',
                    'lineno':
                    parsed_fields['lineno'],
                    'message':
                    parsed_fields['message']
                }
                das_record = DASRecord(fields={field_name: json.dumps(fields)})
                # logging.warning('Message: %s', fields)
                self.data_server_writer.write(das_record)
            except KeyError:
                logging.warning('Couldn\'t parse stderr message: %s', record)
Пример #12
0
    def test_tail_false(self):
        # Don't specify 'tail' and expect there to be no data
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)

            # Create a file slowly, one line at a time
            target = 'f1'
            tmpfilename = tmpdirname + '/' + target
            threading.Thread(target=create_file,
                             args=(tmpfilename, SAMPLE_DATA[target],
                                   0.25)).start()

            time.sleep(0.05)  # let the thread get started

            # Read, and wait for lines to come
            reader = TextFileReader(tmpfilename, tail=False)
            self.assertEqual(None, reader.read())
Пример #13
0
    def run_commands(logger_manager):
      api = logger_manager.api
      time.sleep(1)

      runner = ServerAPICommandLine(api)

      runner.process_command('load_configuration %s' % self.cruise_filename)
      runner.process_command('set_active_mode port')

      # Give it time to run
      time.sleep(2)

      reader = TextFileReader(self.output_filename)

      for i in range(4):
        result = reader.read()
        self.assertEqual(result, 'TestLoggerManager ' + sample_data[i])
      logging.info('TextFileReader done')
      logger_manager.quit()
Пример #14
0
    def test_tail_true(self):
        # Do the same thing as test_tail_false, but specify tail=True. We should
        # now get all the lines that are eventually written to the file.
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)

            # Create a file slowly, one line at a time
            target = 'f1'
            tmpfilename = tmpdirname + '/' + target
            threading.Thread(target=create_file,
                             args=(tmpfilename, SAMPLE_DATA[target],
                                   0.25)).start()

            time.sleep(0.05)  # let the thread get started

            # Read, and wait for lines to come
            reader = TextFileReader(tmpfilename, tail=True)
            for line in SAMPLE_DATA[target]:
                self.assertEqual(line, reader.read())
Пример #15
0
    def test_formats(self):
        reader = TextFileReader(file_spec=None)

        self.assertEqual(reader.output_format(), formats.Text)
        self.assertEqual(reader.output_format(formats.NMEA), formats.NMEA)
        self.assertEqual(reader.output_format(), formats.NMEA)

        with self.assertRaises(TypeError):
            reader.output_format('not a format')
Пример #16
0
    def test_seek_current_negative_offset(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')

            for i in range(5):
                reader.read()

            self.assertEqual(3, reader.seek(-2, 'current'))
            self.assertEqual(expected_lines[3], reader.read())

            # Now try a bigger offset, so we have to go back a couple files.
            reader.seek(8, 'start')
            self.assertEqual(2, reader.seek(-6, 'current'))
            self.assertEqual(expected_lines[2], reader.read())
Пример #17
0
    def test_refresh_file_spec(self):
        # Delay creation of the file, but tell reader to keep checking for
        # new files.
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)

            # Create a file slowly, one line at a time, and delay even
            # creating the file so that when our TextFileReader starts, its
            # file_spec matches nothing.
            target = 'f1'
            tmpfilename = tmpdirname + '/' + target
            threading.Thread(target=create_file,
                             args=(tmpfilename, SAMPLE_DATA[target], 0.25,
                                   0.5)).start()

            time.sleep(0.05)  # let the thread get started

            with self.assertLogs(logging.getLogger(), logging.WARNING):
                reader = TextFileReader(tmpfilename, refresh_file_spec=True)
            for line in SAMPLE_DATA[target]:
                self.assertEqual(line, reader.read())
Пример #18
0
    def test_seek_end_negative_offset(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')

            self.assertEqual(7, reader.seek(-2, 'end'))
            self.assertEqual(expected_lines[7], reader.read())
            self.assertEqual(9, reader.seek(0, 'end'))
            self.assertEqual(None, reader.read())

            # Now try a bigger offset, so we have to go back a couple files.
            self.assertEqual(2, reader.seek(-7, 'end'))
            self.assertEqual(expected_lines[2], reader.read())
    def test_all_files(self):
        # Use TextFileReader's 'interval' flag to make sure we interleave
        # reads the way we expect. Also make sure transforms get applied
        # in proper order.

        readers = []
        for tmpfilename in self.tmpfilenames:
            readers.append(TextFileReader(tmpfilename, interval=0.2))

        #readers.append(TextFileReader()) # read from stdin

        prefix_1 = PrefixTransform('prefix_1')
        prefix_2 = PrefixTransform('prefix_2')

        reader = ComposedReader(readers, [prefix_1, prefix_2])

        # Clunkly quick way of slicing lines
        i = 0
        expected_lines = []
        while True:
            next_lines = []
            for f in sorted(SAMPLE_DATA):
                if i < len(SAMPLE_DATA[f]):
                    line = 'prefix_2 prefix_1 ' + SAMPLE_DATA[f][i]
                    next_lines.append(line)
            if next_lines:
                expected_lines.append(next_lines)
                i += 1
            else:
                break
        logging.debug('Expected lines %s', expected_lines)

        # Next line from each of the files can come in arbitrary order,
        # but within the file, lines should arrive in order, and we
        # should receive first line from each file before we receive
        # next line from any of them.
        while expected_lines:
            next_lines = expected_lines.pop(0)
            while next_lines:
                record = reader.read()
                logging.info('read: %s; expected one of: %s', record,
                             next_lines)
                self.assertTrue(record in next_lines)
                if record in next_lines:
                    next_lines.remove(record)
        self.assertEqual(None, reader.read())
Пример #20
0
    def test_seek_before_beginning(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')

            with self.assertRaises(ValueError):
                reader.seek(-1, 'current')

            with self.assertRaises(ValueError):
                reader.seek(-10, 'end')

            # check seek still works for in-bounds value
            self.assertEqual(2, reader.seek(-7, 'end'))
            self.assertEqual(expected_lines[2], reader.read())
Пример #21
0
    def test_seek_origin(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')

            with self.assertRaises(ValueError):
                reader.seek(0, 'xyz')

            # Move to middle of file (so current position isn't start or end).
            reader.seek(4, 'start')

            # Check that seek with no origin is relative to the current location.
            self.assertEqual(6, reader.seek(2))
            self.assertEqual(expected_lines[6], reader.read())
Пример #22
0
    def test_read_one_write_all(self):
        readers = TextFileReader(self.tmpfilenames[0])

        outfilenames = [
            self.tmpdirname + '/' + f for f in ['f1_out', 'f2_out', 'f3_out']
        ]
        writers = [TextFileWriter(ofn) for ofn in outfilenames]

        listener = Listener(readers=readers, writers=writers)
        listener.run()

        for ofn in outfilenames:
            line_num = 0
            with open(ofn, 'r') as f:
                for line in f.readlines():
                    self.assertEqual(SAMPLE_DATA['f1'][line_num],
                                     line.rstrip())
                    line_num += 1
Пример #23
0
    def test_read_range(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')

            self.assertEqual(expected_lines[1:4], reader.read_range(1, 4))
            self.assertEqual(expected_lines[0:9], reader.read_range(0, 9))
            self.assertEqual(expected_lines[2:3],
                             reader.read_range(start=2, stop=3))
            self.assertEqual(expected_lines[2:], reader.read_range(start=2))
            self.assertEqual(expected_lines[:3], reader.read_range(stop=3))
            self.assertEqual(expected_lines[2:],
                             reader.read_range(start=2, stop=40))
Пример #24
0
    def test_basic(self):

        # Create a file
        temp_dir = tempfile.TemporaryDirectory()
        temp_dir_name = temp_dir.name
        test_file = temp_dir_name + '/test.txt'
        logging.info('creating temporary file "%s"', test_file)
        create_file(test_file, SAMPLE_DATA)

        # Create a reader to read it at 0.5 second intervals
        reader = TextFileReader(test_file, interval=0.5, tail=True)

        timeout_reader = TimeoutReader(reader, timeout=1, message='Timeout')

        # Our reader should do fine until it runs out of records after 2 seconds
        start_time = time.time()
        record = timeout_reader.read()
        end_time = time.time()
        self.assertEqual(record, 'Timeout')
        self.assertAlmostEqual(end_time - start_time, 2.5, delta=0.3)

        logging.info('Got timeout record "%s" after %g seconds', record,
                     end_time - start_time)
Пример #25
0
 def test_compatible(self):
     # Don't specify 'tail' and expect there to be no data
     with tempfile.TemporaryDirectory() as tmpdirname:
         writer = TextFileWriter(tmpdirname + '/f')
         reader = TextFileReader(tmpdirname + '/f')
         self.assertTrue(writer.can_accept(reader))
Пример #26
0
    def test_seek_forward(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')

            self.assertEqual(2, reader.seek(2, 'start'))
            self.assertEqual(expected_lines[2], reader.read())
            self.assertEqual(expected_lines[3], reader.read())
            self.assertEqual(9, reader.seek(0, 'end'))
            self.assertEqual(None, reader.read())
            self.assertEqual(1, reader.seek(1, 'start'))
            self.assertEqual(expected_lines[1], reader.read())
            self.assertEqual(3, reader.seek(1, 'current'))
            self.assertEqual(expected_lines[3], reader.read())
            self.assertEqual(4, reader.seek(0, 'current'))
            self.assertEqual(expected_lines[4], reader.read())
            self.assertEqual(7, reader.seek(2, 'current'))
            self.assertEqual(expected_lines[7], reader.read())
            self.assertEqual(expected_lines[8], reader.read())
            self.assertEqual(None, reader.read())
Пример #27
0
class LogfileReader(TimestampedReader):
    """
  Read lines from one or more text files. Sequentially open all
  files that match the file_spec.
  """

    ############################
    def __init__(self,
                 filebase=None,
                 tail=False,
                 refresh_file_spec=False,
                 retry_interval=0.1,
                 interval=0,
                 use_timestamps=False,
                 time_format=timestamp.TIME_FORMAT,
                 date_format=timestamp.DATE_FORMAT):
        """
    ```
    filebase     Possibly wildcarded string specifying files to be opened.
                 Special case: if file_spec is None, read from stdin.

    tail         If False, return None upon reaching end of last file; if
                 True, block upon reaching EOF of last file and wait for
                 more records.

    refresh_file_spec
                 If True, refresh the search for matching filenames when
                 reaching last EOF to see if any new matching files have
                 appeared in the interim.

    retry_interval
                 If tail and/or refresh_file_spec are True, how long to
                 wait before looking to see if any new records or files
                 have shown up.

    interval
                 How long to sleep between returning records. In general
                 this should be zero except for debugging purposes.
    ```
    Note that the order in which files are opened will probably be in
    alphanumeric by filename, but this is not strictly enforced and
    depends on how glob returns them.
    """
        super().__init__(output_format=Text)

        if interval and use_timestamps:
            raise ValueError(
                'Can not specify both "interval" and "use_timestamps"')

        self.filebase = filebase
        self.use_timestamps = use_timestamps
        self.date_format = date_format
        self.time_format = time_format
        self.tail = tail
        self.refresh_file_spec = refresh_file_spec

        # If use_timestamps, we need to keep track of our last_read to
        # know how long to sleep
        self.last_timestamp = 0
        self.last_read = 0

        self._first_msec_timestamp = None
        self.prev_record = None

        # If they give us a filebase, add wildcard to match its suffixes;
        # otherwise, we'll pass on the empty string to TextFileReader so
        # that it uses stdin. NOTE: we should really use a pattern that
        # echoes timestamp.DATE_FORMAT, e.g.
        # DATE_FORMAT_WILDCARD = '????-??-??'
        self.file_spec = filebase + '*' if filebase else None
        self.reader = TextFileReader(file_spec=self.file_spec,
                                     tail=tail,
                                     refresh_file_spec=refresh_file_spec,
                                     retry_interval=retry_interval,
                                     interval=interval)

    ############################
    def read(self):
        """
    Return the next line in the file(s), or None if there are no more
    records (as opposed to '' if the next record is a blank line). To test
    EOF you'll need to test

      if record is None:
        no more records...

    rather than simply

      if not record:
        could be EOF or simply an empty next line
    """

        # NOTE: It feels like we should check here that the reader's
        # current file really does match our logfile name format...
        while True:
            record = self.reader.read()
            if not record:  # None means we're out of records
                return None

            # If we've got a record and we're not using timestamps, we're
            # done - just return it.
            if not self.use_timestamps:
                self.prev_record = record
                # We need this in case the next call is seek_time() or
                # read_time_range(). This is less expensive than parsing every
                # timestamp and keeping self.last_timestamp, but an
                # alternative might be to implement read_previous(), which
                # would be expensive but which could be called only when
                # actually needed.
                return record

            # If we are using timestamps, make sure we can parse the
            # timestamp off the front. If we can't, complain and try getting
            # the next record.
            try:
                time_str = record.split(' ', 1)[0]
                ts = timestamp.timestamp(time_str,
                                         time_format=self.time_format)
                break
            except ValueError:
                # If, for some reason, the record is malformed, complain and
                # loop to try fetching the next record
                logging.warning('Unable to parse time string from record: %s',
                                record)

        # If here, we've got a record and a timestamp and are intending to
        # use it. Figure out how long we should sleep before returning it.
        desired_interval = ts - self.last_timestamp
        now = timestamp.timestamp()
        actual_interval = now - self.last_read
        logging.debug('Desired interval %f, actual %f; sleeping %f',
                      desired_interval, actual_interval,
                      max(0, desired_interval - actual_interval))
        time.sleep(max(0, desired_interval - actual_interval))

        self.last_timestamp = ts
        self.last_read = timestamp.timestamp()

        self.prev_record = record
        return record

    ############################
    def _read_until(self, desired_time_msec):
        while True:
            record = self.reader.read()
            if record is None:
                return
            self.prev_record = record
            if self._get_msec_timestamp(record) >= desired_time_msec:
                self.reader.seek(-1, 'current')
                return

    ############################
    def _reset(self):
        self.reader.seek(0, 'start')

    ############################
    def _get_msec_timestamp(self, record):
        time_str = record.split(' ', 1)[0]
        return timestamp.timestamp(time_str,
                                   time_format=self.time_format) * 1000

    ############################
    def _peek_msec(self):
        record = self.reader.read()
        if record is None:
            return None
        self.reader.seek(-1, 'current')
        return self._get_msec_timestamp(record)

    ############################
    # Note: this will change the file position if necessary, and should not be used
    # except where that behavior is appropriate.
    def _get_first_msec_timestamp(self):
        if self._first_msec_timestamp is None:
            self._reset()
            record = self.reader.read()
            if record is None:
                return None
            self._first_msec_timestamp = self._get_msec_timestamp(record)
        return self._first_msec_timestamp

    ############################
    def seek_time(self, offset=0, origin='current'):
        """
    Behavior is intended to mimic file seek() behavior but with
    respect to timestamps.
    After calling this, the next record read will be the first record
    whose timestamp is the same as or later than the requested time;
    if no such record is found, it will read to the end.
    Exception: if the records are not in exact chronological order,
    records appearing before the current record but with a later
    timestamp might be missed.

    Args:
      offset: offset in msec relative to origin
      origin: 'start', 'current' or 'end'

    Returns:
      Requested time in msec, i.e. timestamp of (T0 + offset),
      where T0 = timestamp(first record) if origin = 'start'
               = timestamp(next record) if origin = 'current' and next record is not None
               = timestamp(last record) if origin = 'current' and next record is None
               = timestamp(last record) if origin = 'end'
      Returns None if no timestamps were found
    """
        if self.filebase is None:
            raise ValueError('seek_time() not allowed on stdin')

        # TODO: Maybe these are OK, as long as 'end' is defined as the point where
        # read() returns None for the first time.
        if self.tail and origin == 'end':
            raise ValueError('tail=True incompatible with origin == "end"')
        if self.refresh_file_spec and origin == 'end':
            raise ValueError(
                'refresh_file_spec=True incompatible with origin == "end"')

        if origin == 'start':
            if offset < 0:
                raise ValueError("Can't back up past earliest record")
            first_timestamp = self._get_first_msec_timestamp()
            if first_timestamp is None:
                return None
            desired_time = first_timestamp + offset
            if self.prev_record is None:
                self._reset()
            else:
                prev_timestamp = self._get_msec_timestamp(self.prev_record)
                if prev_timestamp >= desired_time:
                    self._reset()
            self._read_until(desired_time)
            return desired_time

        elif origin == 'current':
            next_timestamp = self._peek_msec()
            curr_timestamp = next_timestamp or self._get_msec_timestamp(
                self.prev_record)
            if curr_timestamp is None:
                return None
            desired_time = curr_timestamp + offset
            if offset == 0:
                return desired_time
            if offset < 0:
                self._reset()
            self._read_until(desired_time)
            return desired_time

        elif origin == 'end':
            while self.read() is not None:
                pass
            if self.prev_record is None:
                return None
            end_timestamp = self._get_msec_timestamp(self.prev_record)
            desired_time = end_timestamp + offset
            if offset < 0:
                self._reset()
                self._read_until(desired_time)
            return desired_time

        else:
            raise ValueError('Unknown origin value: "%s"' % origin)

    ############################
    # Read a range of records beginning with timestamp start
    # milliseconds, and ending *before* timestamp stop milliseconds.
    def read_time_range(self, start=None, stop=None):
        if self.filebase is None:
            raise ValueError('read_time_range() not allowed on stdin')

        # TODO: Is this needed? stop=None would be OK unless records are
        # being written faster than they're being read.
        if stop is None:
            if self.tail:
                raise ValueError('tail=True incompatible with stop=None')
            if self.refresh_file_spec:
                raise ValueError(
                    'refresh_file_spec=True incompatible with stop=None')

        if start is None:
            starting_offset = 0
        else:
            starting_offset = start - self._get_first_msec_timestamp()

        self.seek_time(starting_offset, 'start')
        records = []
        while True:
            record = self.read()
            if record is None:
                break
            if stop and self._get_msec_timestamp(record) >= stop:
                break
            records.append(record)
        return records
Пример #28
0
    def test_seek_position_unchanged_after_error(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')

            reader.seek(5, 'start')
            with self.assertRaises(ValueError):
                reader.seek(-8, 'current')
            self.assertEqual(expected_lines[5], reader.read())

            reader.seek(2, 'start')
            with self.assertRaises(ValueError):
                reader.seek(-1, 'start')
            self.assertEqual(expected_lines[2], reader.read())

            reader.seek(7, 'start')
            with self.assertRaises(ValueError):
                reader.seek(-10, 'end')
            self.assertEqual(expected_lines[7], reader.read())
Пример #29
0
    def test_seek_multiple(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')

            self.assertEqual(8, reader.seek(8, 'start'))
            self.assertEqual(expected_lines[8], reader.read())
            self.assertEqual(5, reader.seek(-4, 'current'))
            self.assertEqual(expected_lines[5], reader.read())
            self.assertEqual(7, reader.seek(-2, 'end'))
            self.assertEqual(expected_lines[7], reader.read())
            self.assertEqual(expected_lines[8], reader.read())
            self.assertEqual(None, reader.read())
            self.assertEqual(2, reader.seek(-7, 'end'))
            self.assertEqual(expected_lines[2], reader.read())
            self.assertEqual(8, reader.seek(5, 'current'))
            self.assertEqual(expected_lines[8], reader.read())
Пример #30
0
    def test_position_after_read_range(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            logging.info('created temporary directory "%s"', tmpdirname)
            expected_lines = []
            for f in sorted(SAMPLE_DATA):
                create_file(tmpdirname + '/' + f, SAMPLE_DATA[f])
                expected_lines.extend(SAMPLE_DATA[f])

            reader = TextFileReader(tmpdirname + '/f*')

            reader.read_range(1, 6)
            self.assertEqual(expected_lines[6], reader.read())

            reader.read_range(0, 4)
            self.assertEqual(expected_lines[4], reader.read())

            reader.read_range(7, 9)
            self.assertEqual(None, reader.read())