def find_last_sample_timestamp(pv_name, out_dir, gran, delimiters): # Get the directory path and the prefix of data files. dir_path, file_prefix = pb_filepath.get_dir_and_prefix(out_dir, delimiters, pv_name) # Collect the time suffixes of existing data files for this PV. try: time_suffixes = list(pb_filepath.filter_filenames(os.listdir(dir_path), file_prefix)) except OSError as e: if e.errno == errno.ENOENT: time_suffixes = [] else: raise # Split time suffixes into integer components, but keep the original suffixes around. time_suffixes = map(lambda x: {'suffix':x, 'ints':map(int, x.split('_'))}, time_suffixes) # Sanity check numer of components. num_comps = gran.suffix_count() for x in time_suffixes: if len(x['ints']) != num_comps: raise FindLastSampleError(('Unexpected number of time suffix components: {0}. ' 'You are trying to export data using a different time granularity ' 'than it was used for already exported data.').format(x['suffix'])) # Sort suffixes. time_suffixes = sorted(time_suffixes, key=lambda x: x['ints']) # Have no suffixes? Then there are no samples at all. if len(time_suffixes) == 0: return None for suffix in reversed(time_suffixes): # Make the file path. file_path = pb_filepath.get_path_for_suffix(out_dir, delimiters, pv_name, suffix['suffix']) # Go through this file. with open(file_path, 'rb') as stream: results = pb_verify.verify_stream(stream, pv_name=pv_name) # If any samples were found in this file, the last timestamp in the # file is what we're looking for. Else continue looking into the previous file. if results['last_timestamp'] is not None: year = results['year'] secondsintoyear, nano = results['last_timestamp'] return (year, secondsintoyear, nano) # No samples found in any file. return None
def write_sample(self, sample_pb, dt_seconds, nanoseconds, pb_type): """ Determines the appropriate file for the sample (based on the timestamp) and writes the given sample into a file.""" # Extract the number of seconds into the year. This should be exact. td = dt_seconds - datetime.datetime(dt_seconds.year, 1, 1) into_year_sec_fp = td.seconds + td.days * 24 * 3600 into_year_sec = int(into_year_sec_fp) sample_ts = (into_year_sec, nanoseconds) # Ignore sample if requested by the lower bound. if self._ignore_ts_start is not None: if (dt_seconds.year, into_year_sec, nanoseconds) <= self._ignore_ts_start: self._pvlog.ignored_initial_sample() return # Write timestamp to sample. sample_pb.secondsintoyear, sample_pb.nano = sample_ts # Serialize sample. sample_serialized = sample_pb.SerializeToString() # If this sample does not belong to the currently opened file, close the file. # Note that it's ok to use dt_seconds here since we don't support sub-second granularity. # Same goes for the get_segment_for_time call below. if self._cur_file is not None and not (self._cur_start <= dt_seconds < self._cur_end): self._cur_file.close() self._cur_file = None # Need to open a file? if self._cur_file is None: # Determine the segment for this sample. segment = self._gran.get_segment_for_time(dt_seconds) self._cur_start = segment.start_time() self._cur_end = segment.next_segment().start_time() # Sanity check the segment bounds. assert self._cur_start <= dt_seconds < self._cur_end # Determine the path of the file. self._cur_path = pb_filepath.get_path_for_suffix( self._out_dir, self._delimiters, self._pv_name, segment.file_suffix() ) pb_filepath.make_sure_path_exists(os.path.dirname(self._cur_path)) self._pvlog.info("File: {0}".format(self._cur_path)) # Open file. This creates the file if it does not exist, # and the the cursor is set to the *end*. self._cur_file = open(self._cur_path, "a+b") # Seek to the beginning. self._cur_file.seek(0, 0) # We fail if we found samples newer than this one in the file. upper_ts_bound = sample_ts # Verify any existing contents of the file. try: pb_verify.verify_stream( self._cur_file, pb_type=pb_type, pv_name=self._pv_name, year=dt_seconds.year, upper_ts_bound=upper_ts_bound, ) except pb_verify.VerificationError as e: self._pvlog.error("Verification failed: {0}: {1}".format(self._cur_path, e)) self._cur_file.close() self._cur_file = None return # raise AppenderError('Verification failed: {0}: {1}'.format(self._cur_path, e)) except pb_verify.EmptyFileError: # Build header. header_pb = pbt.PayloadInfo() header_pb.type = pb_type header_pb.pvname = self._pv_name header_pb.year = dt_seconds.year # Write header. Note that since there was no header we are still at the start of the file. self._cur_file.write(pb_escape.escape_line(header_pb.SerializeToString())) # Finally write the sample. self._cur_file.write(pb_escape.escape_line(sample_serialized)) self._pvlog.archived_sample()