def __init__(self, filebase=None, tail=False, refresh_file_spec=False, retry_interval=0.1, interval=0, use_timestamps=False, date_format=timestamp.DATE_FORMAT): """ filebase Possibly wildcarded string specifying files to be opened. Special case: if file_spec is None, read from stdin. tail If False, return None upon reaching end of last file; if True, block upon reaching EOF of last file and wait for more records. refresh_file_spec If True, refresh the search for matching filenames when reaching last EOF to see if any new matching files have appeared in the interim. retry_interval If tail and/or refresh_file_spec are True, how long to wait before looking to see if any new records or files have shown up. interval How long to sleep between returning records. In general this should be zero except for debugging purposes. Note that the order in which files are opened will probably be in alphanumeric by filename, but this is not strictly enforced and depends on how glob returns them. """ super().__init__(output_format=Text) if interval and use_timestamps: raise ValueError( 'Can not specify both "interval" and "use_timestamps"') self.filebase = filebase self.use_timestamps = use_timestamps self.date_format = date_format # If use_timestamps, we need to keep track of our last_read to # know how long to sleep self.last_timestamp = 0 self.last_read = 0 # If they give us a filebase, add wildcard to match its suffixes; # otherwise, we'll pass on the empty string to TextFileReader so # that it uses stdin. NOTE: we should really use a pattern that # echoes timestamp.DATE_FORMAT, e.g. # DATE_FORMAT_WILDCARD = '????-??-??' self.file_spec = filebase + '*' if filebase else None self.reader = TextFileReader(file_spec=self.file_spec, tail=tail, refresh_file_spec=refresh_file_spec, retry_interval=retry_interval, interval=interval)
def test_basic(self): runner = LoggerRunner(self.config, interval=0.1) runner_thread = threading.Thread(target=runner.run) runner_thread.start() time.sleep(0.2) # Assure ourselves that the dest file doesn't exist yet and that # we're in our default mode self.assertFalse(os.path.exists(self.dest_name)) self.assertEqual(runner.mode, "off") with self.assertLogs(logging.getLogger(), logging.WARNING): runner.set_mode('nonexistent mode') self.assertEqual(runner.mode, "off") runner.set_mode('on') self.assertEqual(runner.mode, "on") time.sleep(0.6) reader = TextFileReader(self.dest_name) for line in SAMPLE_DATA: logging.info('Checking line: "%s"', line) self.assertEqual(line, reader.read()) self.assertTrue(runner.processes['logger'].is_alive()) pid = runner.processes['logger'].pid # Try shutting down runner.quit() time.sleep(0.2)
def test_basic(self): # Assure ourselves that the dest file doesn't exist yet and that # we're in our default mode self.assertFalse(os.path.exists(self.dest_name)) runner = LoggerRunner(config=self.config) runner.start() time.sleep(1.0) reader = TextFileReader(self.dest_name) for line in SAMPLE_DATA: result = reader.read() logging.info('Checking line: "%s"', line) logging.info('Against line: "%s"', result) self.assertEqual(line, result) self.assertTrue(runner.is_runnable()) self.assertTrue(runner.is_alive()) self.assertFalse(runner.is_failed()) runner.quit() self.assertFalse(runner.is_alive()) # Try a degenerate runner runner = LoggerRunner(config={}) runner.start() time.sleep(1.0) self.assertFalse(runner.is_runnable()) self.assertFalse(runner.is_alive()) self.assertFalse(runner.is_failed())
def test_check_format(self): # This should be okay - for now it warns us that check_format # is not implemented for ComposedWriter with self.assertLogs(logging.getLogger(), logging.WARNING): with self.assertRaises(ValueError): Listener([ TextFileReader(self.tmpfilenames[0]), TextFileReader(self.tmpfilenames[1]) ], check_format=True)
def test_all_files(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') for line in expected_lines: self.assertEqual(line, reader.read()) self.assertEqual(None, reader.read())
def test_basic(self): # Assure ourselves that the dest file doesn't exist yet and that # we're in our default mode self.assertFalse(os.path.exists(self.dest_name)) runner = LoggerRunner(interval=0.1) runner_thread = threading.Thread(target=runner.run, daemon=True) runner_thread.start() runner.set_configs(self.config['modes']['on']) #logging.warning('CONFIG: %s', self.config) #time.sleep(600) time.sleep(1.0) reader = TextFileReader(self.dest_name) for line in SAMPLE_DATA: result = reader.read() logging.info('Checking line: "%s"', line) logging.info('Against line: "%s"', result) self.assertEqual(line, result) self.assertTrue(runner.logger_is_alive('logger')) pid = runner.processes['logger'].pid status = runner.check_loggers() self.assertDictEqual(status, {'logger': {'config': 'unknown', 'errors': [], 'running': True, 'pid': pid, 'failed': False} }) runner.set_configs(self.config['modes']['off']) time.sleep(0.1) self.assertDictEqual(runner.check_loggers(), {'logger': {'config': None, 'errors': [], 'running': None, 'pid': None, 'failed': False} }) # Verify that the process has indeed shut down. This should throw # an exception if the process doesn't exist. #with self.assertRaises(ProcessLookupError): # os.kill(pid, 0) # Try shutting down runner.quit() runner_thread.join(2.0) self.assertFalse(runner_thread.is_alive())
def test_read_range_after_read(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') for i in range(5): reader.read() self.assertEqual(expected_lines[1:4], reader.read_range(1, 4))
def test_check_format(self): # This should be okay ComposedReader([ TextFileReader(self.tmpfilenames[0]), TextFileReader(self.tmpfilenames[1]) ], check_format=True) # This should not be - no common reader format with self.assertRaises(ValueError): ComposedReader([TextFileReader(self.tmpfilenames[0]), Reader()], check_format=True)
def test_empty_lines_okay(self): # Create a file temp_dir = tempfile.TemporaryDirectory() temp_dir_name = temp_dir.name test_file = temp_dir_name + '/test.txt' logging.info('creating temporary file "%s"', test_file) create_file(test_file, SAMPLE_SPACE_DATA) # Create a reader to read it at 0.5 second intervals reader = TextFileReader(test_file, interval=0.5, tail=True) timeout_reader = TimeoutReader(reader, timeout=1, empty_is_okay=True, message='Space Timeout') # Our reader should do fine until it gets to the very end after 4 seconds start_time = time.time() record = timeout_reader.read() end_time = time.time() self.assertEqual(record, 'Space Timeout') self.assertAlmostEqual(end_time - start_time, 3.5, delta=0.3) logging.info('Got timeout record "%s" after %g seconds', record, end_time - start_time)
def test_read_all_write_one(self): readers = [] for tmpfilename in self.tmpfilenames: readers.append(TextFileReader(tmpfilename, interval=0.2)) transforms = [PrefixTransform('prefix_1'), PrefixTransform('prefix_2')] outfilename = self.tmpdirname + '/f_out' writers = [TextFileWriter(outfilename)] listener = Listener(readers, transforms, writers) listener.run() out_lines = [] with open(outfilename, 'r') as f: for line in f.readlines(): out_lines.append(line.rstrip()) out_lines.sort() source_lines = [] for f in SAMPLE_DATA: source_lines.extend( ['prefix_2 prefix_1 ' + f for f in SAMPLE_DATA[f]]) source_lines.sort() logging.debug('out: %s, source: %s', out_lines, source_lines) self.assertEqual(out_lines, source_lines)
def _stderr_file_to_cds(self, logger, stderr_file_name): """Iteratively read from a file (presumed to be a logger's stderr file and send the lines to a cached data server labeled as coming from stderr:logger:<logger>. Format of error messages is as a JSON-encoded dict of asctime, levelno, levelname, filename, lineno and message. To be run in a separate thread from _check_logger_stderr_loop """ if not self.data_server_writer: logging.error( 'INTERNAL ERROR: called _stderr_file_to_cds(), but no ' 'cached data server defined?!?') return field_name = 'stderr:logger:' + logger message_format = ('{ascdate:S} {asctime:S} {levelno:d} {levelname:w} ' '{filename:w}.py:{lineno:d} {message}') # Our caller checked that this file exists, so open with impunity. reader = TextFileReader(file_spec=stderr_file_name, tail=True) while not self.quit_flag: record = reader.read() try: parsed_fields = parse.parse(message_format, record) fields = { 'asctime': (parsed_fields['ascdate'] + 'T' + parsed_fields['asctime']), 'levelno': parsed_fields['levelno'], 'levelname': parsed_fields['levelname'], 'filename': parsed_fields['filename'] + '.py', 'lineno': parsed_fields['lineno'], 'message': parsed_fields['message'] } das_record = DASRecord(fields={field_name: json.dumps(fields)}) # logging.warning('Message: %s', fields) self.data_server_writer.write(das_record) except KeyError: logging.warning('Couldn\'t parse stderr message: %s', record)
def test_tail_false(self): # Don't specify 'tail' and expect there to be no data with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) # Create a file slowly, one line at a time target = 'f1' tmpfilename = tmpdirname + '/' + target threading.Thread(target=create_file, args=(tmpfilename, SAMPLE_DATA[target], 0.25)).start() time.sleep(0.05) # let the thread get started # Read, and wait for lines to come reader = TextFileReader(tmpfilename, tail=False) self.assertEqual(None, reader.read())
def run_commands(logger_manager): api = logger_manager.api time.sleep(1) runner = ServerAPICommandLine(api) runner.process_command('load_configuration %s' % self.cruise_filename) runner.process_command('set_active_mode port') # Give it time to run time.sleep(2) reader = TextFileReader(self.output_filename) for i in range(4): result = reader.read() self.assertEqual(result, 'TestLoggerManager ' + sample_data[i]) logging.info('TextFileReader done') logger_manager.quit()
def test_tail_true(self): # Do the same thing as test_tail_false, but specify tail=True. We should # now get all the lines that are eventually written to the file. with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) # Create a file slowly, one line at a time target = 'f1' tmpfilename = tmpdirname + '/' + target threading.Thread(target=create_file, args=(tmpfilename, SAMPLE_DATA[target], 0.25)).start() time.sleep(0.05) # let the thread get started # Read, and wait for lines to come reader = TextFileReader(tmpfilename, tail=True) for line in SAMPLE_DATA[target]: self.assertEqual(line, reader.read())
def test_formats(self): reader = TextFileReader(file_spec=None) self.assertEqual(reader.output_format(), formats.Text) self.assertEqual(reader.output_format(formats.NMEA), formats.NMEA) self.assertEqual(reader.output_format(), formats.NMEA) with self.assertRaises(TypeError): reader.output_format('not a format')
def test_seek_current_negative_offset(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') for i in range(5): reader.read() self.assertEqual(3, reader.seek(-2, 'current')) self.assertEqual(expected_lines[3], reader.read()) # Now try a bigger offset, so we have to go back a couple files. reader.seek(8, 'start') self.assertEqual(2, reader.seek(-6, 'current')) self.assertEqual(expected_lines[2], reader.read())
def test_refresh_file_spec(self): # Delay creation of the file, but tell reader to keep checking for # new files. with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) # Create a file slowly, one line at a time, and delay even # creating the file so that when our TextFileReader starts, its # file_spec matches nothing. target = 'f1' tmpfilename = tmpdirname + '/' + target threading.Thread(target=create_file, args=(tmpfilename, SAMPLE_DATA[target], 0.25, 0.5)).start() time.sleep(0.05) # let the thread get started with self.assertLogs(logging.getLogger(), logging.WARNING): reader = TextFileReader(tmpfilename, refresh_file_spec=True) for line in SAMPLE_DATA[target]: self.assertEqual(line, reader.read())
def test_seek_end_negative_offset(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') self.assertEqual(7, reader.seek(-2, 'end')) self.assertEqual(expected_lines[7], reader.read()) self.assertEqual(9, reader.seek(0, 'end')) self.assertEqual(None, reader.read()) # Now try a bigger offset, so we have to go back a couple files. self.assertEqual(2, reader.seek(-7, 'end')) self.assertEqual(expected_lines[2], reader.read())
def test_all_files(self): # Use TextFileReader's 'interval' flag to make sure we interleave # reads the way we expect. Also make sure transforms get applied # in proper order. readers = [] for tmpfilename in self.tmpfilenames: readers.append(TextFileReader(tmpfilename, interval=0.2)) #readers.append(TextFileReader()) # read from stdin prefix_1 = PrefixTransform('prefix_1') prefix_2 = PrefixTransform('prefix_2') reader = ComposedReader(readers, [prefix_1, prefix_2]) # Clunkly quick way of slicing lines i = 0 expected_lines = [] while True: next_lines = [] for f in sorted(SAMPLE_DATA): if i < len(SAMPLE_DATA[f]): line = 'prefix_2 prefix_1 ' + SAMPLE_DATA[f][i] next_lines.append(line) if next_lines: expected_lines.append(next_lines) i += 1 else: break logging.debug('Expected lines %s', expected_lines) # Next line from each of the files can come in arbitrary order, # but within the file, lines should arrive in order, and we # should receive first line from each file before we receive # next line from any of them. while expected_lines: next_lines = expected_lines.pop(0) while next_lines: record = reader.read() logging.info('read: %s; expected one of: %s', record, next_lines) self.assertTrue(record in next_lines) if record in next_lines: next_lines.remove(record) self.assertEqual(None, reader.read())
def test_seek_before_beginning(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') with self.assertRaises(ValueError): reader.seek(-1, 'current') with self.assertRaises(ValueError): reader.seek(-10, 'end') # check seek still works for in-bounds value self.assertEqual(2, reader.seek(-7, 'end')) self.assertEqual(expected_lines[2], reader.read())
def test_seek_origin(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') with self.assertRaises(ValueError): reader.seek(0, 'xyz') # Move to middle of file (so current position isn't start or end). reader.seek(4, 'start') # Check that seek with no origin is relative to the current location. self.assertEqual(6, reader.seek(2)) self.assertEqual(expected_lines[6], reader.read())
def test_read_one_write_all(self): readers = TextFileReader(self.tmpfilenames[0]) outfilenames = [ self.tmpdirname + '/' + f for f in ['f1_out', 'f2_out', 'f3_out'] ] writers = [TextFileWriter(ofn) for ofn in outfilenames] listener = Listener(readers=readers, writers=writers) listener.run() for ofn in outfilenames: line_num = 0 with open(ofn, 'r') as f: for line in f.readlines(): self.assertEqual(SAMPLE_DATA['f1'][line_num], line.rstrip()) line_num += 1
def test_read_range(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') self.assertEqual(expected_lines[1:4], reader.read_range(1, 4)) self.assertEqual(expected_lines[0:9], reader.read_range(0, 9)) self.assertEqual(expected_lines[2:3], reader.read_range(start=2, stop=3)) self.assertEqual(expected_lines[2:], reader.read_range(start=2)) self.assertEqual(expected_lines[:3], reader.read_range(stop=3)) self.assertEqual(expected_lines[2:], reader.read_range(start=2, stop=40))
def test_basic(self): # Create a file temp_dir = tempfile.TemporaryDirectory() temp_dir_name = temp_dir.name test_file = temp_dir_name + '/test.txt' logging.info('creating temporary file "%s"', test_file) create_file(test_file, SAMPLE_DATA) # Create a reader to read it at 0.5 second intervals reader = TextFileReader(test_file, interval=0.5, tail=True) timeout_reader = TimeoutReader(reader, timeout=1, message='Timeout') # Our reader should do fine until it runs out of records after 2 seconds start_time = time.time() record = timeout_reader.read() end_time = time.time() self.assertEqual(record, 'Timeout') self.assertAlmostEqual(end_time - start_time, 2.5, delta=0.3) logging.info('Got timeout record "%s" after %g seconds', record, end_time - start_time)
def test_compatible(self): # Don't specify 'tail' and expect there to be no data with tempfile.TemporaryDirectory() as tmpdirname: writer = TextFileWriter(tmpdirname + '/f') reader = TextFileReader(tmpdirname + '/f') self.assertTrue(writer.can_accept(reader))
def test_seek_forward(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') self.assertEqual(2, reader.seek(2, 'start')) self.assertEqual(expected_lines[2], reader.read()) self.assertEqual(expected_lines[3], reader.read()) self.assertEqual(9, reader.seek(0, 'end')) self.assertEqual(None, reader.read()) self.assertEqual(1, reader.seek(1, 'start')) self.assertEqual(expected_lines[1], reader.read()) self.assertEqual(3, reader.seek(1, 'current')) self.assertEqual(expected_lines[3], reader.read()) self.assertEqual(4, reader.seek(0, 'current')) self.assertEqual(expected_lines[4], reader.read()) self.assertEqual(7, reader.seek(2, 'current')) self.assertEqual(expected_lines[7], reader.read()) self.assertEqual(expected_lines[8], reader.read()) self.assertEqual(None, reader.read())
class LogfileReader(TimestampedReader): """ Read lines from one or more text files. Sequentially open all files that match the file_spec. """ ############################ def __init__(self, filebase=None, tail=False, refresh_file_spec=False, retry_interval=0.1, interval=0, use_timestamps=False, time_format=timestamp.TIME_FORMAT, date_format=timestamp.DATE_FORMAT): """ ``` filebase Possibly wildcarded string specifying files to be opened. Special case: if file_spec is None, read from stdin. tail If False, return None upon reaching end of last file; if True, block upon reaching EOF of last file and wait for more records. refresh_file_spec If True, refresh the search for matching filenames when reaching last EOF to see if any new matching files have appeared in the interim. retry_interval If tail and/or refresh_file_spec are True, how long to wait before looking to see if any new records or files have shown up. interval How long to sleep between returning records. In general this should be zero except for debugging purposes. ``` Note that the order in which files are opened will probably be in alphanumeric by filename, but this is not strictly enforced and depends on how glob returns them. """ super().__init__(output_format=Text) if interval and use_timestamps: raise ValueError( 'Can not specify both "interval" and "use_timestamps"') self.filebase = filebase self.use_timestamps = use_timestamps self.date_format = date_format self.time_format = time_format self.tail = tail self.refresh_file_spec = refresh_file_spec # If use_timestamps, we need to keep track of our last_read to # know how long to sleep self.last_timestamp = 0 self.last_read = 0 self._first_msec_timestamp = None self.prev_record = None # If they give us a filebase, add wildcard to match its suffixes; # otherwise, we'll pass on the empty string to TextFileReader so # that it uses stdin. NOTE: we should really use a pattern that # echoes timestamp.DATE_FORMAT, e.g. # DATE_FORMAT_WILDCARD = '????-??-??' self.file_spec = filebase + '*' if filebase else None self.reader = TextFileReader(file_spec=self.file_spec, tail=tail, refresh_file_spec=refresh_file_spec, retry_interval=retry_interval, interval=interval) ############################ def read(self): """ Return the next line in the file(s), or None if there are no more records (as opposed to '' if the next record is a blank line). To test EOF you'll need to test if record is None: no more records... rather than simply if not record: could be EOF or simply an empty next line """ # NOTE: It feels like we should check here that the reader's # current file really does match our logfile name format... while True: record = self.reader.read() if not record: # None means we're out of records return None # If we've got a record and we're not using timestamps, we're # done - just return it. if not self.use_timestamps: self.prev_record = record # We need this in case the next call is seek_time() or # read_time_range(). This is less expensive than parsing every # timestamp and keeping self.last_timestamp, but an # alternative might be to implement read_previous(), which # would be expensive but which could be called only when # actually needed. return record # If we are using timestamps, make sure we can parse the # timestamp off the front. If we can't, complain and try getting # the next record. try: time_str = record.split(' ', 1)[0] ts = timestamp.timestamp(time_str, time_format=self.time_format) break except ValueError: # If, for some reason, the record is malformed, complain and # loop to try fetching the next record logging.warning('Unable to parse time string from record: %s', record) # If here, we've got a record and a timestamp and are intending to # use it. Figure out how long we should sleep before returning it. desired_interval = ts - self.last_timestamp now = timestamp.timestamp() actual_interval = now - self.last_read logging.debug('Desired interval %f, actual %f; sleeping %f', desired_interval, actual_interval, max(0, desired_interval - actual_interval)) time.sleep(max(0, desired_interval - actual_interval)) self.last_timestamp = ts self.last_read = timestamp.timestamp() self.prev_record = record return record ############################ def _read_until(self, desired_time_msec): while True: record = self.reader.read() if record is None: return self.prev_record = record if self._get_msec_timestamp(record) >= desired_time_msec: self.reader.seek(-1, 'current') return ############################ def _reset(self): self.reader.seek(0, 'start') ############################ def _get_msec_timestamp(self, record): time_str = record.split(' ', 1)[0] return timestamp.timestamp(time_str, time_format=self.time_format) * 1000 ############################ def _peek_msec(self): record = self.reader.read() if record is None: return None self.reader.seek(-1, 'current') return self._get_msec_timestamp(record) ############################ # Note: this will change the file position if necessary, and should not be used # except where that behavior is appropriate. def _get_first_msec_timestamp(self): if self._first_msec_timestamp is None: self._reset() record = self.reader.read() if record is None: return None self._first_msec_timestamp = self._get_msec_timestamp(record) return self._first_msec_timestamp ############################ def seek_time(self, offset=0, origin='current'): """ Behavior is intended to mimic file seek() behavior but with respect to timestamps. After calling this, the next record read will be the first record whose timestamp is the same as or later than the requested time; if no such record is found, it will read to the end. Exception: if the records are not in exact chronological order, records appearing before the current record but with a later timestamp might be missed. Args: offset: offset in msec relative to origin origin: 'start', 'current' or 'end' Returns: Requested time in msec, i.e. timestamp of (T0 + offset), where T0 = timestamp(first record) if origin = 'start' = timestamp(next record) if origin = 'current' and next record is not None = timestamp(last record) if origin = 'current' and next record is None = timestamp(last record) if origin = 'end' Returns None if no timestamps were found """ if self.filebase is None: raise ValueError('seek_time() not allowed on stdin') # TODO: Maybe these are OK, as long as 'end' is defined as the point where # read() returns None for the first time. if self.tail and origin == 'end': raise ValueError('tail=True incompatible with origin == "end"') if self.refresh_file_spec and origin == 'end': raise ValueError( 'refresh_file_spec=True incompatible with origin == "end"') if origin == 'start': if offset < 0: raise ValueError("Can't back up past earliest record") first_timestamp = self._get_first_msec_timestamp() if first_timestamp is None: return None desired_time = first_timestamp + offset if self.prev_record is None: self._reset() else: prev_timestamp = self._get_msec_timestamp(self.prev_record) if prev_timestamp >= desired_time: self._reset() self._read_until(desired_time) return desired_time elif origin == 'current': next_timestamp = self._peek_msec() curr_timestamp = next_timestamp or self._get_msec_timestamp( self.prev_record) if curr_timestamp is None: return None desired_time = curr_timestamp + offset if offset == 0: return desired_time if offset < 0: self._reset() self._read_until(desired_time) return desired_time elif origin == 'end': while self.read() is not None: pass if self.prev_record is None: return None end_timestamp = self._get_msec_timestamp(self.prev_record) desired_time = end_timestamp + offset if offset < 0: self._reset() self._read_until(desired_time) return desired_time else: raise ValueError('Unknown origin value: "%s"' % origin) ############################ # Read a range of records beginning with timestamp start # milliseconds, and ending *before* timestamp stop milliseconds. def read_time_range(self, start=None, stop=None): if self.filebase is None: raise ValueError('read_time_range() not allowed on stdin') # TODO: Is this needed? stop=None would be OK unless records are # being written faster than they're being read. if stop is None: if self.tail: raise ValueError('tail=True incompatible with stop=None') if self.refresh_file_spec: raise ValueError( 'refresh_file_spec=True incompatible with stop=None') if start is None: starting_offset = 0 else: starting_offset = start - self._get_first_msec_timestamp() self.seek_time(starting_offset, 'start') records = [] while True: record = self.read() if record is None: break if stop and self._get_msec_timestamp(record) >= stop: break records.append(record) return records
def test_seek_position_unchanged_after_error(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') reader.seek(5, 'start') with self.assertRaises(ValueError): reader.seek(-8, 'current') self.assertEqual(expected_lines[5], reader.read()) reader.seek(2, 'start') with self.assertRaises(ValueError): reader.seek(-1, 'start') self.assertEqual(expected_lines[2], reader.read()) reader.seek(7, 'start') with self.assertRaises(ValueError): reader.seek(-10, 'end') self.assertEqual(expected_lines[7], reader.read())
def test_seek_multiple(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') self.assertEqual(8, reader.seek(8, 'start')) self.assertEqual(expected_lines[8], reader.read()) self.assertEqual(5, reader.seek(-4, 'current')) self.assertEqual(expected_lines[5], reader.read()) self.assertEqual(7, reader.seek(-2, 'end')) self.assertEqual(expected_lines[7], reader.read()) self.assertEqual(expected_lines[8], reader.read()) self.assertEqual(None, reader.read()) self.assertEqual(2, reader.seek(-7, 'end')) self.assertEqual(expected_lines[2], reader.read()) self.assertEqual(8, reader.seek(5, 'current')) self.assertEqual(expected_lines[8], reader.read())
def test_position_after_read_range(self): with tempfile.TemporaryDirectory() as tmpdirname: logging.info('created temporary directory "%s"', tmpdirname) expected_lines = [] for f in sorted(SAMPLE_DATA): create_file(tmpdirname + '/' + f, SAMPLE_DATA[f]) expected_lines.extend(SAMPLE_DATA[f]) reader = TextFileReader(tmpdirname + '/f*') reader.read_range(1, 6) self.assertEqual(expected_lines[6], reader.read()) reader.read_range(0, 4) self.assertEqual(expected_lines[4], reader.read()) reader.read_range(7, 9) self.assertEqual(None, reader.read())